From 4ab4fa1032170b6ec50ffff0767096b3200dde8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 20 Aug 2019 15:33:23 -0700 Subject: [PATCH 001/331] drm/i915/psr: Make PSR registers relative to transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PSR registers are a mess, some have the full address while others just have the additional offset from psr_mmio_base. For BDW+ psr_mmio_base is nothing more than TRANSCODER_EDP_OFFSET + 0x800 and using it makes more difficult for people with an PSR register address or PSR register name from from BSpec as i915 also don't match the BSpec names. For HSW psr_mmio_base is _DDI_BUF_CTL_A + 0x800 and PSR registers are only available in DDIA. Other reason to make relative to transcoder is that since BDW every transcoder have PSR registers, so in theory it should be possible to have PSR enabled in a non-eDP transcoder. So for BDW+ we can use _TRANS2() to get the register offset of any PSR register in any transcoder while for HSW we have _HSW_PSR_ADJ that will calculate the register offset for the single PSR instance, noting that we are already guarded about trying to enable PSR in other port than DDIA on HSW by the 'if (dig_port->base.port != PORT_A)' in intel_psr_compute_config(), this check should only be valid for HSW and will be changed in future. PSR2 registers and PSR_EVENT was added after Haswell so that is why _PSR_ADJ() is not used in some macros. The only registers that can not be relative to transcoder are PSR_IMR and PSR_IIR that are not relative to anything, so keeping it hardcoded. That changed for TGL but it will be handled in another patch. Also removing BDW_EDP_PSR_BASE from GVT because it is not used as it is the only PSR register that GVT have. v5: - Macros changed to be more explicit about HSW (Dhinakaran) - Squashed with the patch that added the tran parameter to the macros (Dhinakaran) v6: - Checking for interruption errors after module reload in the transcoder that will be used (Dhinakaran) - Using lowercase to the registers offsets v7: - Removing IS_HASWELL() from registers macros(Jani) Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Zhi Wang Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190820223325.27490-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 104 +++++++++++++---------- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 18 ++-- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_reg.h | 57 +++++++++---- 5 files changed, 113 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 3bfb720560c2f..77232f6bca176 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -390,7 +390,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) BUILD_BUG_ON(sizeof(aux_msg) > 20); for (i = 0; i < sizeof(aux_msg); i += 4) - I915_WRITE(EDP_PSR_AUX_DATA(i >> 2), + I915_WRITE(EDP_PSR_AUX_DATA(dev_priv->psr.transcoder, i >> 2), intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0); @@ -401,7 +401,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) /* Select only valid bits for SRD_AUX_CTL */ aux_ctl &= psr_aux_mask; - I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl); + I915_WRITE(EDP_PSR_AUX_CTL(dev_priv->psr.transcoder), aux_ctl); } static void intel_psr_enable_sink(struct intel_dp *intel_dp) @@ -491,8 +491,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) if (INTEL_GEN(dev_priv) >= 8) val |= EDP_PSR_CRC_ENABLE; - val |= I915_READ(EDP_PSR_CTL) & EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK; - I915_WRITE(EDP_PSR_CTL, val); + val |= (I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & + EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK); + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val); } static void hsw_activate_psr2(struct intel_dp *intel_dp) @@ -528,9 +529,9 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. */ - I915_WRITE(EDP_PSR_CTL, 0); + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), 0); - I915_WRITE(EDP_PSR2_CTL, val); + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); } static bool intel_psr2_config_valid(struct intel_dp *intel_dp, @@ -606,10 +607,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, /* * HSW spec explicitly says PSR is tied to port A. - * BDW+ platforms with DDI implementation of PSR have different - * PSR registers per transcoder and we only implement transcoder EDP - * ones. Since by Display design transcoder EDP is tied to port A - * we can safely escape based on the port A. + * BDW+ platforms have a instance of PSR registers per transcoder but + * for now it only supports one instance of PSR, so lets keep it + * hardcoded to PORT_A */ if (dig_port->base.port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); @@ -649,8 +649,8 @@ static void intel_psr_activate(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); if (INTEL_GEN(dev_priv) >= 9) - WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + WARN_ON(I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)) & EDP_PSR2_ENABLE); + WARN_ON(I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & EDP_PSR_ENABLE); WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); @@ -720,19 +720,37 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (INTEL_GEN(dev_priv) < 11) mask |= EDP_PSR_DEBUG_MASK_DISP_REG_WRITE; - I915_WRITE(EDP_PSR_DEBUG, mask); + I915_WRITE(EDP_PSR_DEBUG(dev_priv->psr.transcoder), mask); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = dev_priv->psr.dp; + u32 val; WARN_ON(dev_priv->psr.enabled); dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state); dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + dev_priv->psr.transcoder = crtc_state->cpu_transcoder; + + /* + * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR + * will still keep the error set even after the reset done in the + * irq_preinstall and irq_uninstall hooks. + * And enabling in this situation cause the screen to freeze in the + * first time that PSR HW tries to activate so lets keep PSR disabled + * to avoid any rendering problems. + */ + val = I915_READ(EDP_PSR_IIR); + val &= EDP_PSR_ERROR(edp_psr_shift(dev_priv->psr.transcoder)); + if (val) { + dev_priv->psr.sink_not_reliable = true; + DRM_DEBUG_KMS("PSR interruption error set, not enabling PSR\n"); + return; + } DRM_DEBUG_KMS("Enabling PSR%s\n", dev_priv->psr.psr2_enabled ? "2" : "1"); @@ -782,20 +800,27 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) u32 val; if (!dev_priv->psr.active) { - if (INTEL_GEN(dev_priv) >= 9) - WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (INTEL_GEN(dev_priv) >= 9) { + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + WARN_ON(val & EDP_PSR2_ENABLE); + } + + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); + WARN_ON(val & EDP_PSR_ENABLE); + return; } if (dev_priv->psr.psr2_enabled) { - val = I915_READ(EDP_PSR2_CTL); + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR2_ENABLE)); - I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE); + val &= ~EDP_PSR2_ENABLE; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); } else { - val = I915_READ(EDP_PSR_CTL); + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR_ENABLE)); - I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE); + val &= ~EDP_PSR_ENABLE; + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val); } dev_priv->psr.active = false; } @@ -817,10 +842,10 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_psr_exit(dev_priv); if (dev_priv->psr.psr2_enabled) { - psr_status = EDP_PSR2_STATUS; + psr_status = EDP_PSR2_STATUS(dev_priv->psr.transcoder); psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; } else { - psr_status = EDP_PSR_STATUS; + psr_status = EDP_PSR_STATUS(dev_priv->psr.transcoder); psr_status_mask = EDP_PSR_STATUS_STATE_MASK; } @@ -963,7 +988,8 @@ int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, * defensive enough to cover everything. */ - return __intel_wait_for_register(&dev_priv->uncore, EDP_PSR_STATUS, + return __intel_wait_for_register(&dev_priv->uncore, + EDP_PSR_STATUS(dev_priv->psr.transcoder), EDP_PSR_STATUS_STATE_MASK, EDP_PSR_STATUS_STATE_IDLE, 2, 50, out_value); @@ -979,10 +1005,10 @@ static bool __psr_wait_for_idle_locked(struct drm_i915_private *dev_priv) return false; if (dev_priv->psr.psr2_enabled) { - reg = EDP_PSR2_STATUS; + reg = EDP_PSR2_STATUS(dev_priv->psr.transcoder); mask = EDP_PSR2_STATUS_STATE_MASK; } else { - reg = EDP_PSR_STATUS; + reg = EDP_PSR_STATUS(dev_priv->psr.transcoder); mask = EDP_PSR_STATUS_STATE_MASK; } @@ -1208,36 +1234,24 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, */ void intel_psr_init(struct drm_i915_private *dev_priv) { - u32 val; - if (!HAS_PSR(dev_priv)) return; - dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? - HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; - if (!dev_priv->psr.sink_support) return; + if (IS_HASWELL(dev_priv)) + /* + * HSW don't have PSR registers on the same space as transcoder + * so set this to a value that when subtract to the register + * in transcoder space results in the right offset for HSW + */ + dev_priv->hsw_psr_mmio_adjust = _SRD_CTL_EDP - _HSW_EDP_PSR_BASE; + if (i915_modparams.enable_psr == -1) if (INTEL_GEN(dev_priv) < 9 || !dev_priv->vbt.psr.enable) i915_modparams.enable_psr = 0; - /* - * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR - * will still keep the error set even after the reset done in the - * irq_preinstall and irq_uninstall hooks. - * And enabling in this situation cause the screen to freeze in the - * first time that PSR HW tries to activate so lets keep PSR disabled - * to avoid any rendering problems. - */ - val = I915_READ(EDP_PSR_IIR); - val &= EDP_PSR_ERROR(edp_psr_shift(TRANSCODER_EDP)); - if (val) { - DRM_DEBUG_KMS("PSR interruption error set\n"); - dev_priv->psr.sink_not_reliable = true; - } - /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) /* HSW and BDW require workarounds that we don't implement. */ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 25f78196b964d..45a9124e53b6d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2796,7 +2796,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); - MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); + MMIO_D(_MMIO(_SRD_CTL_EDP), D_BDW); MMIO_D(_MMIO(0x6671c), D_BDW_PLUS); MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b0f51591f2e4a..e103fcba64352 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2133,7 +2133,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "BUF_ON", "TG_ON" }; - val = I915_READ(EDP_PSR2_STATUS); + val = I915_READ(EDP_PSR2_STATUS(dev_priv->psr.transcoder)); status_val = (val & EDP_PSR2_STATUS_STATE_MASK) >> EDP_PSR2_STATUS_STATE_SHIFT; if (status_val < ARRAY_SIZE(live_status)) @@ -2149,7 +2149,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "SRDOFFACK", "SRDENT_ON", }; - val = I915_READ(EDP_PSR_STATUS); + val = I915_READ(EDP_PSR_STATUS(dev_priv->psr.transcoder)); status_val = (val & EDP_PSR_STATUS_STATE_MASK) >> EDP_PSR_STATUS_STATE_SHIFT; if (status_val < ARRAY_SIZE(live_status)) @@ -2192,10 +2192,10 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) goto unlock; if (psr->psr2_enabled) { - val = I915_READ(EDP_PSR2_CTL); + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); enabled = val & EDP_PSR2_ENABLE; } else { - val = I915_READ(EDP_PSR_CTL); + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); enabled = val & EDP_PSR_ENABLE; } seq_printf(m, "Source PSR ctl: %s [0x%08x]\n", @@ -2208,7 +2208,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * SKL+ Perf counter is reset to 0 everytime DC state is entered */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - val = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK; + val = I915_READ(EDP_PSR_PERF_CNT(dev_priv->psr.transcoder)); + val &= EDP_PSR_PERF_CNT_MASK; seq_printf(m, "Performance counter: %u\n", val); } @@ -2226,8 +2227,11 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * Reading all 3 registers before hand to minimize crossing a * frame boundary between register reads */ - for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) - su_frames_val[frame / 3] = I915_READ(PSR2_SU_STATUS(frame)); + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) { + val = I915_READ(PSR2_SU_STATUS(dev_priv->psr.transcoder, + frame)); + su_frames_val[frame / 3] = val; + } seq_puts(m, "Frame:\tPSR2 SU blocks:\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d725e0bba408..048896b1348b9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -479,6 +479,7 @@ struct i915_psr { bool enabled; struct intel_dp *dp; enum pipe pipe; + enum transcoder transcoder; bool active; struct work_struct work; unsigned busy_frontbuffer_bits; @@ -1330,11 +1331,11 @@ struct drm_i915_private { */ u32 gpio_mmio_base; + u32 hsw_psr_mmio_adjust; + /* MMIO base address for MIPI regs */ u32 mipi_mmio_base; - u32 psr_mmio_base; - u32 pps_mmio_base; wait_queue_head_t gmbus_wait_queue; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2abd199093c5b..a092b34c269d6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4186,10 +4186,17 @@ enum { #define PIPESRC(trans) _MMIO_TRANS2(trans, _PIPEASRC) #define PIPE_MULT(trans) _MMIO_TRANS2(trans, _PIPE_MULT_A) -/* HSW+ eDP PSR registers */ -#define HSW_EDP_PSR_BASE 0x64800 -#define BDW_EDP_PSR_BASE 0x6f800 -#define EDP_PSR_CTL _MMIO(dev_priv->psr_mmio_base + 0) +/* + * HSW+ eDP PSR registers + * + * HSW PSR registers are relative to DDIA(_DDI_BUF_CTL_A + 0x800) with just one + * instance of it + */ +#define _HSW_EDP_PSR_BASE 0x64800 +#define _SRD_CTL_A 0x60800 +#define _SRD_CTL_EDP 0x6f800 +#define _PSR_ADJ(tran, reg) (_TRANS2(tran, reg) - dev_priv->hsw_psr_mmio_adjust) +#define EDP_PSR_CTL(tran) _MMIO(_PSR_ADJ(tran, _SRD_CTL_A)) #define EDP_PSR_ENABLE (1 << 31) #define BDW_PSR_SINGLE_FRAME (1 << 30) #define EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK (1 << 29) /* SW can't modify */ @@ -4226,16 +4233,22 @@ enum { #define EDP_PSR_TRANSCODER_A_SHIFT 8 #define EDP_PSR_TRANSCODER_EDP_SHIFT 0 -#define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) +#define _SRD_AUX_CTL_A 0x60810 +#define _SRD_AUX_CTL_EDP 0x6f810 +#define EDP_PSR_AUX_CTL(tran) _MMIO(_PSR_ADJ(tran, _SRD_AUX_CTL_A)) #define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) #define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) #define EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK (0xf << 16) #define EDP_PSR_AUX_CTL_ERROR_INTERRUPT (1 << 11) #define EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK (0x7ff) -#define EDP_PSR_AUX_DATA(i) _MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */ +#define _SRD_AUX_DATA_A 0x60814 +#define _SRD_AUX_DATA_EDP 0x6f814 +#define EDP_PSR_AUX_DATA(tran, i) _MMIO(_PSR_ADJ(tran, _SRD_AUX_DATA_A) + (i) + 4) /* 5 registers */ -#define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) +#define _SRD_STATUS_A 0x60840 +#define _SRD_STATUS_EDP 0x6f840 +#define EDP_PSR_STATUS(tran) _MMIO(_PSR_ADJ(tran, _SRD_STATUS_A)) #define EDP_PSR_STATUS_STATE_MASK (7 << 29) #define EDP_PSR_STATUS_STATE_SHIFT 29 #define EDP_PSR_STATUS_STATE_IDLE (0 << 29) @@ -4260,10 +4273,15 @@ enum { #define EDP_PSR_STATUS_SENDING_TP1 (1 << 4) #define EDP_PSR_STATUS_IDLE_MASK 0xf -#define EDP_PSR_PERF_CNT _MMIO(dev_priv->psr_mmio_base + 0x44) +#define _SRD_PERF_CNT_A 0x60844 +#define _SRD_PERF_CNT_EDP 0x6f844 +#define EDP_PSR_PERF_CNT(tran) _MMIO(_PSR_ADJ(tran, _SRD_PERF_CNT_A)) #define EDP_PSR_PERF_CNT_MASK 0xffffff -#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */ +/* PSR_MASK on SKL+ */ +#define _SRD_DEBUG_A 0x60860 +#define _SRD_DEBUG_EDP 0x6f860 +#define EDP_PSR_DEBUG(tran) _MMIO(_PSR_ADJ(tran, _SRD_DEBUG_A)) #define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1 << 28) #define EDP_PSR_DEBUG_MASK_LPSP (1 << 27) #define EDP_PSR_DEBUG_MASK_MEMUP (1 << 26) @@ -4271,7 +4289,9 @@ enum { #define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1 << 16) /* Reserved in ICL+ */ #define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1 << 15) /* SKL+ */ -#define EDP_PSR2_CTL _MMIO(0x6f900) +#define _PSR2_CTL_A 0x60900 +#define _PSR2_CTL_EDP 0x6f900 +#define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) #define EDP_PSR2_ENABLE (1 << 31) #define EDP_SU_TRACK_ENABLE (1 << 30) #define EDP_Y_COORDINATE_VALID (1 << 26) /* GLK and CNL+ */ @@ -4293,8 +4313,8 @@ enum { #define _PSR_EVENT_TRANS_B 0x61848 #define _PSR_EVENT_TRANS_C 0x62848 #define _PSR_EVENT_TRANS_D 0x63848 -#define _PSR_EVENT_TRANS_EDP 0x6F848 -#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define _PSR_EVENT_TRANS_EDP 0x6f848 +#define PSR_EVENT(tran) _MMIO_TRANS2(tran, _PSR_EVENT_TRANS_A) #define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) #define PSR_EVENT_PSR2_DISABLED (1 << 16) #define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) @@ -4312,15 +4332,16 @@ enum { #define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) #define PSR_EVENT_PSR_DISABLE (1 << 0) -#define EDP_PSR2_STATUS _MMIO(0x6f940) +#define _PSR2_STATUS_A 0x60940 +#define _PSR2_STATUS_EDP 0x6f940 +#define EDP_PSR2_STATUS(tran) _MMIO_TRANS2(tran, _PSR2_STATUS_A) #define EDP_PSR2_STATUS_STATE_MASK (0xf << 28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 -#define _PSR2_SU_STATUS_0 0x6F914 -#define _PSR2_SU_STATUS_1 0x6F918 -#define _PSR2_SU_STATUS_2 0x6F91C -#define _PSR2_SU_STATUS(index) _MMIO(_PICK_EVEN((index), _PSR2_SU_STATUS_0, _PSR2_SU_STATUS_1)) -#define PSR2_SU_STATUS(frame) (_PSR2_SU_STATUS((frame) / 3)) +#define _PSR2_SU_STATUS_A 0x60914 +#define _PSR2_SU_STATUS_EDP 0x6f914 +#define _PSR2_SU_STATUS(tran, index) _MMIO(_TRANS2(tran, _PSR2_SU_STATUS_A) + (index) * 4) +#define PSR2_SU_STATUS(tran, frame) (_PSR2_SU_STATUS(tran, (frame) / 3)) #define PSR2_SU_STATUS_SHIFT(frame) (((frame) % 3) * 10) #define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) #define PSR2_SU_STATUS_FRAMES 8 From 99fc38b12095cd0ca9fc5b8c8e27e827dff6ed34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 20 Aug 2019 15:33:24 -0700 Subject: [PATCH 002/331] drm/i915: Add transcoder restriction to PSR2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to PSR2_CTL definition in BSpec there is only one instance of PSR2_CTL. Platforms gen < 12 with EDP transcoder only support PSR2 on TRANSCODER_EDP while on TGL PSR2 is only supported by TRANSCODER_A. Since BDW PSR is allowed on any port, but we need to restrict by transcoder. v8: Renamed _psr2_supported_in_trans() to psr2_supported() (Lucas) v9: Renamed psr2_supported() to transcoder_has_psr2() (Ville) BSpec: 7713 BSpec: 20584 Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20190820223325.27490-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 77232f6bca176..771d9a40bf12f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -534,6 +534,15 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); } +static bool +transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans) +{ + if (INTEL_GEN(dev_priv) >= 12) + return trans == TRANSCODER_A; + else + return trans == TRANSCODER_EDP; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -545,6 +554,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, if (!dev_priv->psr.sink_psr2_support) return false; + if (!transcoder_has_psr2(dev_priv, crtc_state->cpu_transcoder)) { + DRM_DEBUG_KMS("PSR2 not supported in transcoder %s\n", + transcoder_name(crtc_state->cpu_transcoder)); + return false; + } + /* * DSC and PSR2 cannot be enabled simultaneously. If a requested * resolution requires DSC to be enabled, priority is given to DSC From df7415bfc06f8ee482a510aea39c12121e409dcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 20 Aug 2019 15:33:25 -0700 Subject: [PATCH 003/331] drm/i915: Do not unmask PSR interruption in IRQ postinstall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to unmask PSR interrutpion if PSR is not enabled, better move the call to intel_psr_enable_source(). v2: Renamed intel_psr_irq_control() to psr_irq_control() (Lucas) Cc: Rodrigo Vivi Cc: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190820223325.27490-3-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 6 ++++-- drivers/gpu/drm/i915/display/intel_psr.h | 1 - drivers/gpu/drm/i915/i915_irq.c | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 771d9a40bf12f..28b62e5872044 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -105,7 +105,7 @@ static int edp_psr_shift(enum transcoder cpu_transcoder) } } -void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug) +static void psr_irq_control(struct drm_i915_private *dev_priv, u32 debug) { u32 debug_mask, mask; enum transcoder cpu_transcoder; @@ -736,6 +736,8 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, mask |= EDP_PSR_DEBUG_MASK_DISP_REG_WRITE; I915_WRITE(EDP_PSR_DEBUG(dev_priv->psr.transcoder), mask); + + psr_irq_control(dev_priv, dev_priv->psr.debug); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, @@ -1108,7 +1110,7 @@ int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 val) old_mode = dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK; dev_priv->psr.debug = val; - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + psr_irq_control(dev_priv, dev_priv->psr.debug); mutex_unlock(&dev_priv->psr.lock); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index dc818826f36dc..46e4de8b8cd5e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -30,7 +30,6 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, void intel_psr_init(struct drm_i915_private *dev_priv); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); -void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug); void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 37e3dd3c1a9dc..77391d8325bf3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3684,7 +3684,6 @@ static void ironlake_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_HASWELL(dev_priv)) { gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); display_mask |= DE_EDP_PSR_INT_HSW; } @@ -3795,7 +3794,6 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; From cee508a0bddba1627575de2e41fb8e321fa380a8 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Tue, 20 Aug 2019 15:30:59 -0700 Subject: [PATCH 004/331] drm/dp/dsc: Add Support for all BPCs supported by TGL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSC engine on ICL supports only 8 and 10 BPC as the input BPC. But DSC engine in TGL supports 8, 10 and 12 BPC. Add 12 BPC support for DSC while calculating compression configuration. v2: Remove the separate define TGL_DP_DSC_MAX_SUPPORTED_BPC and use the value directly.(More such defines can be removed as part of future patches). (Ville) v3: Use values directly instead of accessing the defines everytime for min and max DSC BPC. Cc: Ville Syrjälä Cc: Manasi Navare Signed-off-by: Anusha Srivatsa Reviewed-by: Manasi Navare Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190820223059.18052-1-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 921ad0a2f7ba7..23908da1cd5da 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -70,8 +70,6 @@ /* DP DSC small joiner has 2 FIFOs each of 640 x 6 bytes */ #define DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER 61440 -#define DP_DSC_MIN_SUPPORTED_BPC 8 -#define DP_DSC_MAX_SUPPORTED_BPC 10 /* DP DSC throughput values used for slice count calculations KPixels/s */ #define DP_DSC_PEAK_PIXEL_RATE 2720000 @@ -1915,11 +1913,17 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, if (!intel_dp_supports_dsc(intel_dp, pipe_config)) return -EINVAL; - dsc_max_bpc = min_t(u8, DP_DSC_MAX_SUPPORTED_BPC, - conn_state->max_requested_bpc); + /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */ + if (INTEL_GEN(dev_priv) >= 12) + dsc_max_bpc = min_t(u8, 12, conn_state->max_requested_bpc); + else + dsc_max_bpc = min_t(u8, 10, + conn_state->max_requested_bpc); pipe_bpp = intel_dp_dsc_compute_bpp(intel_dp, dsc_max_bpc); - if (pipe_bpp < DP_DSC_MIN_SUPPORTED_BPC * 3) { + + /* Min Input BPC for ICL+ is 8 */ + if (pipe_bpp < 8 * 3) { DRM_DEBUG_KMS("No DSC support for less than 8bpc\n"); return -EINVAL; } From d4c61c4a16decd8ace8660f22c81609a539fccba Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Wed, 21 Aug 2019 14:59:50 -0700 Subject: [PATCH 005/331] drm/i915/dp: Fix DSC enable code to use cpu_transcoder instead of encoder->type This patch fixes the intel_configure_pps_for_dsc_encoder() function to use cpu_transcoder instead of encoder->type to select the correct DSC registers that was wrongly used in the original patch for one DSC register isntance. Fixes: 7182414e2530 ("drm/i915/dp: Configure i915 Picture parameter Set registers during DSC enabling") Cc: Ville Syrjala Cc: Maarten Lankhorst Cc: Jani Nikula Cc: Ville Syrjala Cc: # v5.0+ Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190821215950.24223-1-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_vdsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 598ddb60f9fb5..d4fb7f16f9f6c 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -547,7 +547,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_PIC_HEIGHT(vdsc_cfg->pic_height) | DSC_PIC_WIDTH(vdsc_cfg->pic_width / num_vdsc_instances); DRM_INFO("PPS2 = 0x%08x\n", pps_val); - if (encoder->type == INTEL_OUTPUT_EDP) { + if (cpu_transcoder == TRANSCODER_EDP) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_2, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second From b3c0692f36a44bd02ee8ccc5bed972a36fbf2e99 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 23 Aug 2019 01:20:33 -0700 Subject: [PATCH 006/331] drm/i915/tgl: Move GTCR register to cope with GAM MMIO address remap GAM registers located in the 0x4xxx range have been relocated to 0xCxxx; this is to make space for global MOCS registers. v2: Rename register and bitfield to its new name (suggested by Mika) HSD: 399379 Cc: Daniele Ceraolo Spurio Signed-off-by: Michel Thierry Reviewed-by: Lucas De Marchi Reviewed-by: Mika Kuoppala Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 3 +++ drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index edf194d23c6b1..1949346e714ee 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -83,6 +83,9 @@ #define GEN8_GTCR _MMIO(0x4274) #define GEN8_GTCR_INVALIDATE (1<<0) +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + #define GUC_ARAT_C6DIS _MMIO(0xA178) #define GUC_SHIM_CONTROL _MMIO(0xc064) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0b81e0b643930..2a425db1cfd89 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -132,9 +132,15 @@ static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; gen6_ggtt_invalidate(ggtt); - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + if (INTEL_GEN(i915) >= 12) + intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + else + intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); } static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) From 5d86923060fce88a4dda8d8c9c5d5eb32f37c37b Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 23 Aug 2019 01:20:34 -0700 Subject: [PATCH 007/331] drm/i915/tgl: Enable VD HCP/MFX sub-pipe power gating HCP/MFX power gating is disabled by default, turn it on for the vd units available. User space will also issue a MI_FORCE_WAKEUP properly to wake up proper subwell. During driver load, init_clock_gating happens after device_info_init_mmio read the vdbox disable fuse register, so only present vd units will have these enabled. BSpec: 14214 HSDES: 1209977827 Signed-off-by: Michel Thierry Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Reviewed-by: Tony Ye Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a092b34c269d6..02e1ef10c47e7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8615,6 +8615,10 @@ enum { #define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) #define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) +#define POWERGATE_ENABLE _MMIO(0xa210) +#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3) +#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4) + #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) #define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 75ee027abb80e..d3ea193cd0939 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -9078,6 +9078,22 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE)); } +static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + u32 vd_pg_enable = 0; + unsigned int i; + + /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ + for (i = 0; i < I915_MAX_VCS; i++) { + if (HAS_ENGINE(dev_priv, _VCS(i))) + vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) | + VDN_MFX_POWERGATE_ENABLE(i); + } + + I915_WRITE(POWERGATE_ENABLE, + I915_READ(POWERGATE_ENABLE) | vd_pg_enable); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -9598,7 +9614,7 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { if (IS_GEN(dev_priv, 12)) - dev_priv->display.init_clock_gating = nop_init_clock_gating; + dev_priv->display.init_clock_gating = tgl_init_clock_gating; else if (IS_GEN(dev_priv, 11)) dev_priv->display.init_clock_gating = icl_init_clock_gating; else if (IS_CANNONLAKE(dev_priv)) From 4087f873df1fbf9d9590953119ea78dde6a759dd Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:02:57 -0700 Subject: [PATCH 008/331] drm/i915: Use variable for debugfs device status Use a local variable to find SSEU runtime information in various debugfs functions. v2: Remove extra line breaks per feedback from Chris Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-2-stuart.summers@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e103fcba64352..806db87affb2f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3849,8 +3849,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask[s] = - RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; @@ -3877,25 +3876,22 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { + const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO); int s; sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->eu_per_subslice = - RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice; - for (s = 0; s < fls(sseu->slice_mask); s++) { - sseu->subslice_mask[s] = - RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; - } + sseu->eu_per_subslice = info->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; sseu->eu_total = sseu->eu_per_subslice * intel_sseu_subslice_total(sseu); /* subtract fused off EU(s) from enabled slice(s) */ for (s = 0; s < fls(sseu->slice_mask); s++) { - u8 subslice_7eu = - RUNTIME_INFO(dev_priv)->sseu.subslice_7eu[s]; + u8 subslice_7eu = info->sseu.subslice_7eu[s]; sseu->eu_total -= hweight8(subslice_7eu); } @@ -3942,6 +3938,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); struct sseu_dev_info sseu; intel_wakeref_t wakeref; @@ -3949,14 +3946,13 @@ static int i915_sseu_status(struct seq_file *m, void *unused) return -ENODEV; seq_puts(m, "SSEU Device Info\n"); - i915_print_sseu_info(m, true, &RUNTIME_INFO(dev_priv)->sseu); + i915_print_sseu_info(m, true, &info->sseu); seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); - sseu.max_slices = RUNTIME_INFO(dev_priv)->sseu.max_slices; - sseu.max_subslices = RUNTIME_INFO(dev_priv)->sseu.max_subslices; - sseu.max_eus_per_subslice = - RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice; + sseu.max_slices = info->sseu.max_slices; + sseu.max_subslices = info->sseu.max_subslices; + sseu.max_eus_per_subslice = info->sseu.max_eus_per_subslice; with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { if (IS_CHERRYVIEW(dev_priv)) From 8b355db99cfbdaea848e05c288d6d80c391f0b13 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:02:58 -0700 Subject: [PATCH 009/331] drm/i915: Add function to set SSEU info per platform Add a new function to allow each platform to set maximum slice, subslice, and EU information to reduce code duplication. Signed-off-by: Stuart Summers Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-3-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 8 +++++ drivers/gpu/drm/i915/gt/intel_sseu.h | 3 ++ drivers/gpu/drm/i915/i915_debugfs.c | 6 ++-- drivers/gpu/drm/i915/intel_device_info.c | 39 +++++++++--------------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6bf2d87da109b..6727079eb9b65 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -8,6 +8,14 @@ #include "intel_lrc_reg.h" #include "intel_sseu.h" +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice) +{ + sseu->max_slices = max_slices; + sseu->max_subslices = max_subslices; + sseu->max_eus_per_subslice = max_eus_per_subslice; +} + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index b50d0401a4e21..64e47dad07be3 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -63,6 +63,9 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice); + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 806db87affb2f..28713c9c98eab 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3950,9 +3950,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused) seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); - sseu.max_slices = info->sseu.max_slices; - sseu.max_subslices = info->sseu.max_subslices; - sseu.max_eus_per_subslice = info->sseu.max_eus_per_subslice; + intel_sseu_set_info(&sseu, info->sseu.max_slices, + info->sseu.max_subslices, + info->sseu.max_eus_per_subslice); with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { if (IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d0ed44d334842..77d7bbaa49f31 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -191,15 +191,10 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) u8 eu_en; int s; - if (IS_ELKHARTLAKE(dev_priv)) { - sseu->max_slices = 1; - sseu->max_subslices = 4; - sseu->max_eus_per_subslice = 8; - } else { - sseu->max_slices = 1; - sseu->max_subslices = 8; - sseu->max_eus_per_subslice = 8; - } + if (IS_ELKHARTLAKE(dev_priv)) + intel_sseu_set_info(sseu, 1, 4, 8); + else + intel_sseu_set_info(sseu, 1, 8, 8); s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); @@ -236,11 +231,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) const int eu_mask = 0xff; u32 subslice_mask, eu_en; + intel_sseu_set_info(sseu, 6, 4, 8); + sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->max_slices = 6; - sseu->max_subslices = 4; - sseu->max_eus_per_subslice = 8; subslice_mask = (1 << 4) - 1; subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> @@ -314,9 +308,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); - sseu->max_slices = 1; - sseu->max_subslices = 2; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, 1, 2, 8); if (!(fuse & CHV_FGT_DISABLE_SS0)) { u8 disabled_mask = @@ -372,9 +364,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; /* BXT has a single slice and at most 3 subslices. */ - sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; - sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3, + IS_GEN9_LP(dev_priv) ? 3 : 4, 8); /* * The subslice disable field is global, i.e. it applies @@ -473,9 +464,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; - sseu->max_slices = 3; - sseu->max_subslices = 3; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, 3, 3, 8); /* * The subslice disable field is global, i.e. it applies @@ -577,9 +566,6 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) break; } - sseu->max_slices = hweight8(sseu->slice_mask); - sseu->max_subslices = hweight8(sseu->subslice_mask[0]); - fuse1 = I915_READ(HSW_PAVP_FUSE1); switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { default: @@ -596,7 +582,10 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->eu_per_subslice = 6; break; } - sseu->max_eus_per_subslice = sseu->eu_per_subslice; + + intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), + hweight8(sseu->subslice_mask[0]), + sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { for (ss = 0; ss < sseu->max_subslices; ss++) { From 7a200aad1127e98cbf4da9685f652899598fe357 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:02:59 -0700 Subject: [PATCH 010/331] drm/i915: Add subslice stride runtime parameter Add a new parameter, ss_stride, to the runtime info structure. This is used to mirror the userspace concept of subslice stride, which is a range of subslices per slice. This patch simply adds the definition and updates usage in the QUERY_TOPOLOGY_INFO handler. v2: Add GEM_BUG_ON to make sure ss_stride is valid Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-4-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 3 +++ drivers/gpu/drm/i915/gt/intel_sseu.h | 3 +++ drivers/gpu/drm/i915/i915_query.c | 5 ++--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6727079eb9b65..edf39ae132c3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -14,6 +14,9 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->max_slices = max_slices; sseu->max_subslices = max_subslices; sseu->max_eus_per_subslice = max_eus_per_subslice; + + sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); + GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); } unsigned int diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 64e47dad07be3..8b8b562ff7732 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -15,6 +15,7 @@ struct drm_i915_private; #define GEN_MAX_SLICES (6) /* CNL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) struct sseu_dev_info { u8 slice_mask; @@ -33,6 +34,8 @@ struct sseu_dev_info { u8 max_subslices; u8 max_eus_per_subslice; + u8 ss_stride; + /* We don't have more than 8 eus per subslice at the moment and as we * store eus enabled using bits, no need to multiply by eus per * subslice. diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index ad9240a0817aa..d8e25dcf5f0b7 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -37,7 +37,6 @@ static int query_topology_info(struct drm_i915_private *dev_priv, const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; - u8 subslice_stride = GEN_SSEU_STRIDE(sseu->max_subslices); u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int ret; @@ -50,7 +49,7 @@ static int query_topology_info(struct drm_i915_private *dev_priv, BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); slice_length = sizeof(sseu->slice_mask); - subslice_length = sseu->max_slices * subslice_stride; + subslice_length = sseu->max_slices * sseu->ss_stride; eu_length = sseu->max_slices * sseu->max_subslices * eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; @@ -69,7 +68,7 @@ static int query_topology_info(struct drm_i915_private *dev_priv, topo.max_eus_per_subslice = sseu->max_eus_per_subslice; topo.subslice_offset = slice_length; - topo.subslice_stride = subslice_stride; + topo.subslice_stride = sseu->ss_stride; topo.eu_offset = slice_length + subslice_length; topo.eu_stride = eu_stride; From 49610c377be7f32d948f0970705b4ad015923aa0 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:00 -0700 Subject: [PATCH 011/331] drm/i915: Add EU stride runtime parameter Add a new SSEU runtime parameter, eu_stride, which is used to mirror the userspace concept of a range of EUs per subslice. This patch simply adds the parameter and updates usage in the QUERY_TOPOLOGY_INFO handler. v2: Add GEM_BUG_ON to make sure eu_stride is valid Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-5-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 2 ++ drivers/gpu/drm/i915/gt/intel_sseu.h | 3 +++ drivers/gpu/drm/i915/i915_query.c | 5 ++--- drivers/gpu/drm/i915/intel_device_info.c | 9 ++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index edf39ae132c3e..d52686a1afdcb 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -17,6 +17,8 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); + sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); } unsigned int diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 8b8b562ff7732..7f2355ce963d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -16,6 +16,8 @@ struct drm_i915_private; #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) +#define GEN_MAX_EUS (10) /* HSW upper bound */ +#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; @@ -35,6 +37,7 @@ struct sseu_dev_info { u8 max_eus_per_subslice; u8 ss_stride; + u8 eu_stride; /* We don't have more than 8 eus per subslice at the moment and as we * store eus enabled using bits, no need to multiply by eus per diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index d8e25dcf5f0b7..abac5042da2b6 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -37,7 +37,6 @@ static int query_topology_info(struct drm_i915_private *dev_priv, const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; - u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int ret; if (query_item->flags != 0) @@ -50,7 +49,7 @@ static int query_topology_info(struct drm_i915_private *dev_priv, slice_length = sizeof(sseu->slice_mask); subslice_length = sseu->max_slices * sseu->ss_stride; - eu_length = sseu->max_slices * sseu->max_subslices * eu_stride; + eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; @@ -70,7 +69,7 @@ static int query_topology_info(struct drm_i915_private *dev_priv, topo.subslice_offset = slice_length; topo.subslice_stride = sseu->ss_stride; topo.eu_offset = slice_length + subslice_length; - topo.eu_stride = eu_stride; + topo.eu_stride = sseu->eu_stride; if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr), &topo, sizeof(topo))) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 77d7bbaa49f31..b1a79ed408ebb 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -118,10 +118,9 @@ void intel_device_info_dump_runtime(const struct intel_runtime_info *info, static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, int subslice) { - int subslice_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - int slice_stride = sseu->max_subslices * subslice_stride; + int slice_stride = sseu->max_subslices * sseu->eu_stride; - return slice * slice_stride + subslice * subslice_stride; + return slice * slice_stride + subslice * sseu->eu_stride; } static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, @@ -130,7 +129,7 @@ static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int i, offset = sseu_eu_idx(sseu, slice, subslice); u16 eu_mask = 0; - for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) { + for (i = 0; i < sseu->eu_stride; i++) { eu_mask |= ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); } @@ -143,7 +142,7 @@ static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, { int i, offset = sseu_eu_idx(sseu, slice, subslice); - for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) { + for (i = 0; i < sseu->eu_stride; i++) { sseu->eu_mask[offset + i] = (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; } From 33ee9e8680963fbfa298fec08eed7b45db0a1ebb Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:01 -0700 Subject: [PATCH 012/331] drm/i915: Use local variables for subslice_mask for device info When setting up subslice_mask, instead of operating on the slice array directly, use a local variable to start bits per slice, then use this to set the per slice array in one step. Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-6-stuart.summers@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 49 +++++++++++++----------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index b1a79ed408ebb..52515efe9f4e8 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -235,18 +235,6 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - subslice_mask = (1 << 4) - 1; - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); - - /* - * Slice0 can have up to 3 subslices, but there are only 2 in - * slice1/2. - */ - sseu->subslice_mask[0] = subslice_mask; - for (s = 1; s < sseu->max_slices; s++) - sseu->subslice_mask[s] = subslice_mask & 0x3; - /* Slice0 */ eu_en = ~I915_READ(GEN8_EU_DISABLE0); for (ss = 0; ss < sseu->max_subslices; ss++) @@ -270,14 +258,24 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) eu_en = ~I915_READ(GEN10_EU_DISABLE3); sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); - /* Do a second pass where we mark the subslices disabled if all their - * eus are off. - */ + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + for (s = 0; s < sseu->max_slices; s++) { + u32 subslice_mask_with_eus = subslice_mask; + for (ss = 0; ss < sseu->max_subslices; ss++) { if (sseu_get_eus(sseu, s, ss) == 0) - sseu->subslice_mask[s] &= ~BIT(ss); + subslice_mask_with_eus &= ~BIT(ss); } + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + sseu->subslice_mask[s] = s == 0 ? subslice_mask_with_eus : + subslice_mask_with_eus & 0x3; } sseu->eu_total = compute_eu_total(sseu); @@ -303,6 +301,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse; + u8 subslice_mask = 0; fuse = I915_READ(CHV_FUSE_GT); @@ -316,7 +315,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); - sseu->subslice_mask[0] |= BIT(0); + subslice_mask |= BIT(0); sseu_set_eus(sseu, 0, 0, ~disabled_mask); } @@ -327,10 +326,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); - sseu->subslice_mask[0] |= BIT(1); + subslice_mask |= BIT(1); sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + sseu->subslice_mask[0] = subslice_mask; + sseu->eu_total = compute_eu_total(sseu); /* @@ -540,6 +541,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse1; + u8 subslice_mask = 0; int s, ss; /* @@ -552,16 +554,15 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) /* fall through */ case 1: sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] = BIT(0); + subslice_mask = BIT(0); break; case 2: sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); break; case 3: sseu->slice_mask = BIT(0) | BIT(1); - sseu->subslice_mask[0] = BIT(0) | BIT(1); - sseu->subslice_mask[1] = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); break; } @@ -583,10 +584,12 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) } intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), - hweight8(sseu->subslice_mask[0]), + hweight8(subslice_mask), sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { + sseu->subslice_mask[s] = subslice_mask; + for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, (1UL << sseu->eu_per_subslice) - 1); From 9e8a135ed5a414ba4333cda22b01ae77291f6b9b Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:02 -0700 Subject: [PATCH 013/331] drm/i915: Add function to set subslices Add a new function to set a set of subslices for a given slice. v2: Fix typo in subslice_mask assignment Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-7-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 6 ++++++ drivers/gpu/drm/i915/gt/intel_sseu.h | 3 +++ drivers/gpu/drm/i915/intel_device_info.c | 18 +++++++++++------- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index d52686a1afdcb..3a5db0dbac72c 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,6 +32,12 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u8 ss_mask) +{ + sseu->subslice_mask[slice] = ss_mask; +} + unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 7f2355ce963d9..7f600f50dedb2 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -78,6 +78,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u8 ss_mask); + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 52515efe9f4e8..1a45728ac7126 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -206,7 +206,10 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) int ss; sseu->slice_mask |= BIT(s); - sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; + + intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & + ss_en_mask); + for (ss = 0; ss < sseu->max_subslices; ss++) { if (sseu->subslice_mask[s] & BIT(ss)) sseu_set_eus(sseu, s, ss, eu_en); @@ -274,8 +277,9 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) * Slice0 can have up to 3 subslices, but there are only 2 in * slice1/2. */ - sseu->subslice_mask[s] = s == 0 ? subslice_mask_with_eus : - subslice_mask_with_eus & 0x3; + intel_sseu_set_subslices(sseu, s, s == 0 ? + subslice_mask_with_eus : + subslice_mask_with_eus & 0x3); } sseu->eu_total = compute_eu_total(sseu); @@ -330,7 +334,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) sseu_set_eus(sseu, 0, 1, ~disabled_mask); } - sseu->subslice_mask[0] = subslice_mask; + intel_sseu_set_subslices(sseu, 0, subslice_mask); sseu->eu_total = compute_eu_total(sseu); @@ -384,7 +388,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled slice */ continue; - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); eu_disable = I915_READ(GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -491,7 +495,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled slice */ continue; - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -588,7 +592,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, From 6db40ec80f544e289fe836319b20fcc19a454e8d Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:03 -0700 Subject: [PATCH 014/331] drm/i915: Use subslice stride to set subslices for a given slice Add a subslice stride calculation when setting subslices. This aligns more closely with the userspace expectation of the subslice mask structure. v2: Use local variable for subslice_mask on HSW and clean up a few other subslice_mask local variable changes v3: Add GEM_BUG_ON for ss_stride to prevent array overflow (Chris) Split main set function and refactors in intel_device_info.c into separate patches (Chris) v4: Reduce ss_stride size check when setting subslices per slice based on actual expected max stride (Chris) Move that GEM_BUG_ON check for the ss_stride out to the patch which adds the ss_stride v5: Use memcpy instead of looping through each stride index Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-8-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 6 ++++-- drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 3a5db0dbac72c..1505042d7b5dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -33,9 +33,11 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u8 ss_mask) + u32 ss_mask) { - sseu->subslice_mask[slice] = ss_mask; + int offset = slice * sseu->ss_stride; + + memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); } unsigned int diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 7f600f50dedb2..73a9064291a2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -79,7 +79,7 @@ unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u8 ss_mask); + u32 ss_mask); u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); From e1210bbfb1f8dc2865e8ebd6884afa895333b6e4 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:04 -0700 Subject: [PATCH 015/331] drm/i915: Add function to determine if a slice has a subslice Add a new function to determine whether a particular slice has a given subslice. Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-9-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.h | 16 ++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 9 ++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 73a9064291a2e..7703d75f2da31 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -10,6 +10,8 @@ #include #include +#include "i915_gem.h" + struct drm_i915_private; #define GEN_MAX_SLICES (6) /* CNL upper bound */ @@ -69,6 +71,20 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } +static inline bool +intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + u8 mask; + int ss_idx = subslice / BITS_PER_BYTE; + + GEM_BUG_ON(ss_idx >= sseu->ss_stride); + + mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; + + return mask & BIT(subslice % BITS_PER_BYTE); +} + void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, u8 max_subslices, u8 max_eus_per_subslice); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 1a45728ac7126..c20f74ee5f227 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -210,10 +210,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & ss_en_mask); - for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu->subslice_mask[s] & BIT(ss)) + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) sseu_set_eus(sseu, s, ss, eu_en); - } } } sseu->eu_per_subslice = hweight8(eu_en); @@ -395,7 +394,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) int eu_per_ss; u8 eu_disabled_mask; - if (!(sseu->subslice_mask[s] & BIT(ss))) + if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue; @@ -501,7 +500,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask[s] & BIT(ss))) + if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue; From eaef5b3c411337db73a205d0e009c72f7a39f3a3 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:05 -0700 Subject: [PATCH 016/331] drm/i915: Refactor instdone loops on new subslice functions Refactor instdone loops to use the new intel_sseu_has_subslice function. Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-10-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 30 +++++++++----------- drivers/gpu/drm/i915/gt/intel_hangcheck.c | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 5 ++-- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 82630db0394b3..17006d50b63f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -948,6 +948,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -965,7 +966,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); - for_each_instdone_slice_subslice(i915, slice, subslice) { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = read_subslice_reg(engine, slice, subslice, GEN7_SAMPLER_INSTDONE); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a82cea95c2f2d..15e02cb58a678 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -576,20 +576,18 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) - -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) - -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) - +#define instdone_has_slice(dev_priv___, sseu___, slice___) \ + ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) + +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ + (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ + intel_sseu_has_subslice(sseu__, 0, subslice__)) + +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ + for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ + (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ + (slice_) += ((subslice_) == 0)) \ + for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ + (instdone_has_subslice(dev_priv_, sseu_, slice_, \ + subslice_))) #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 05d042cdefe24..40f62f780be5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -53,6 +53,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) static bool subunits_stuck(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct intel_instdone instdone; struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; bool stuck; @@ -71,7 +72,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) stuck &= instdone_unchanged(instdone.slice_common, &accu_instdone->slice_common); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) { stuck &= instdone_unchanged(instdone.sampler[slice][subslice], &accu_instdone->sampler[slice][subslice]); stuck &= instdone_unchanged(instdone.row[slice][subslice], diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 28713c9c98eab..44797f8b7c501 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -996,6 +996,7 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv, struct seq_file *m, struct intel_instdone *instdone) { + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; int slice; int subslice; @@ -1011,11 +1012,11 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) <= 6) return; - for_each_instdone_slice_subslice(dev_priv, slice, subslice) + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, instdone->sampler[slice][subslice]); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, instdone->row[slice][subslice]); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e284bd76fa866..4aff342b89443 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -421,6 +421,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct drm_i915_error_engine *ee) { + const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu; int slice; int subslice; @@ -436,12 +437,12 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (INTEL_GEN(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, slice, subslice) + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.sampler[slice][subslice]); - for_each_instdone_slice_subslice(m->i915, slice, subslice) + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.row[slice][subslice]); From 668df17f594d6f57e822ceee680ace3233e97f02 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:06 -0700 Subject: [PATCH 017/331] drm/i915: Add new function to copy subslices for a slice Add a new function to copy subslices for a specified slice between intel_sseu structures for the purpose of determining power-gate status. Note that currently ss_stride has a max of 1. Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-11-stuart.summers@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 44797f8b7c501..6e8b402999399 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3726,6 +3726,15 @@ i915_cache_sharing_set(void *data, u64 val) return 0; } +static void +intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice, + u8 *to_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride); +} + DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops, i915_cache_sharing_get, i915_cache_sharing_set, "%llu\n"); @@ -3799,7 +3808,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask); for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; @@ -3850,7 +3859,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; @@ -3886,7 +3896,8 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, if (sseu->slice_mask) { sseu->eu_per_subslice = info->sseu.eu_per_subslice; for (s = 0; s < fls(sseu->slice_mask); s++) - sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); sseu->eu_total = sseu->eu_per_subslice * intel_sseu_subslice_total(sseu); From 100f5f7fbc3e23f58511a11a3751dfacf1ccb5a7 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 23 Aug 2019 09:03:07 -0700 Subject: [PATCH 018/331] drm/i915: Expand subslice mask Currently, the subslice_mask runtime parameter is stored as an array of subslices per slice. Expand the subslice mask array to better match what is presented to userspace through the I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is then calculated: slice * subslice stride + subslice index / 8 v2: Fix 32-bit build v3: Use new helper function in SSEU workaround warning message v4: Use GEM_BUG_ON to force developers to use valid SSEU configurations per platform (Chris) Signed-off-by: Stuart Summers Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190823160307.180813-12-stuart.summers@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 16 +++++++++++++++- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +++- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 ++--- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- drivers/gpu/drm/i915/intel_device_info.c | 8 ++++---- 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 1505042d7b5dd..74f793423231b 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,6 +32,20 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + int i, offset = slice * sseu->ss_stride; + u32 mask = 0; + + GEM_BUG_ON(slice >= sseu->max_slices); + + for (i = 0; i < sseu->ss_stride; i++) + mask |= (u32)sseu->subslice_mask[offset + i] << + i * BITS_PER_BYTE; + + return mask; +} + void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u32 ss_mask) { @@ -43,7 +57,7 @@ void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) { - return hweight8(sseu->subslice_mask[slice]); + return hweight32(intel_sseu_get_subslices(sseu, slice)); } u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 7703d75f2da31..4070f6ff1db60 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -23,7 +23,7 @@ struct drm_i915_private; struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; + u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -94,6 +94,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u32 ss_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 126ab36679196..ad2261e0cba80 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -801,11 +801,10 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) } slice = fls(sseu->slice_mask) - 1; - GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); - subslice = fls(l3_en & sseu->subslice_mask[slice]); + subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); if (!subslice) { DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", - sseu->subslice_mask[slice], l3_en); + intel_sseu_get_subslices(sseu, slice), l3_en); subslice = fls(l3_en); WARN_ON(!subslice); } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6e8b402999399..8c1d704254240 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3864,13 +3864,16 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; + u8 ss_idx = s * info->sseu.ss_stride + + ss / BITS_PER_BYTE; if (IS_GEN9_LP(dev_priv)) { if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; - sseu->subslice_mask[s] |= BIT(ss); + sseu->subslice_mask[ss_idx] |= + BIT(ss % BITS_PER_BYTE); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index c20f74ee5f227..d9b5baaef5d0e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -93,9 +93,9 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) hweight8(sseu->slice_mask), sseu->slice_mask); drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslices, mask=%04x\n", + drm_printf(p, "slice%d: %u subslices, mask=%08x\n", s, intel_sseu_subslices_per_slice(sseu, s), - sseu->subslice_mask[s]); + intel_sseu_get_subslices(sseu, s)); } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); @@ -159,9 +159,9 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, } for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", + drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", s, intel_sseu_subslices_per_slice(sseu, s), - sseu->subslice_mask[s]); + intel_sseu_get_subslices(sseu, s)); for (ss = 0; ss < sseu->max_subslices; ss++) { u16 enabled_eus = sseu_get_eus(sseu, s, ss); From d06a79d33e0f97373a29decb66791758a6d2e968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Aug 2019 20:30:29 +0300 Subject: [PATCH 019/331] drm/i915: Use enum pipe instead of crtc index to track active pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We may need to eliminate the crtc->index == pipe assumptions from the code to support arbitrary pipes being fused off. Start that by switching some bitmasks over to using pipe instead of the crtc index. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190821173033.24123-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 +++++------ drivers/gpu/drm/i915/display/intel_display.c | 20 +++++++++---------- .../drm/i915/display/intel_display_types.h | 4 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 20 +++++++++---------- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index d0bc42e5039c4..939088c7d8143 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2369,7 +2369,7 @@ static int vlv_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = vlv_calc_voltage_level(dev_priv, cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = vlv_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); state->cdclk.actual.cdclk = cdclk; @@ -2400,7 +2400,7 @@ static int bdw_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = bdw_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = bdw_calc_cdclk(state->cdclk.force_min_cdclk); state->cdclk.actual.cdclk = cdclk; @@ -2470,7 +2470,7 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = skl_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = skl_calc_cdclk(state->cdclk.force_min_cdclk, vco); state->cdclk.actual.vco = vco; @@ -2506,7 +2506,7 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = bxt_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(state->cdclk.force_min_cdclk); vco = glk_de_pll_vco(dev_priv, cdclk); @@ -2544,7 +2544,7 @@ static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) max(cnl_calc_voltage_level(cdclk), cnl_compute_min_voltage_level(state)); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = cnl_calc_cdclk(state->cdclk.force_min_cdclk); vco = cnl_cdclk_pll_vco(dev_priv, cdclk); @@ -2578,7 +2578,7 @@ static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) max(icl_calc_voltage_level(dev_priv, cdclk), cnl_compute_min_voltage_level(state)); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref); vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b51d1ceb87393..7b74df03a1ef2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7080,7 +7080,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, intel_display_power_put_unchecked(dev_priv, domain); intel_crtc->enabled_power_domains = 0; - dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); + dev_priv->active_pipes &= ~BIT(intel_crtc->pipe); dev_priv->min_cdclk[intel_crtc->pipe] = 0; dev_priv->min_voltage_level[intel_crtc->pipe] = 0; @@ -13452,7 +13452,7 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk; state->modeset = true; - state->active_crtcs = dev_priv->active_crtcs; + state->active_pipes = dev_priv->active_pipes; state->cdclk.logical = dev_priv->cdclk.logical; state->cdclk.actual = dev_priv->cdclk.actual; state->cdclk.pipe = INVALID_PIPE; @@ -13460,12 +13460,12 @@ static int intel_modeset_checks(struct intel_atomic_state *state) for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (new_crtc_state->base.active) - state->active_crtcs |= 1 << i; + state->active_pipes |= BIT(crtc->pipe); else - state->active_crtcs &= ~(1 << i); + state->active_pipes &= ~BIT(crtc->pipe); if (old_crtc_state->base.active != new_crtc_state->base.active) - state->active_pipe_changes |= drm_crtc_mask(&crtc->base); + state->active_pipe_changes |= BIT(crtc->pipe); } /* @@ -13494,11 +13494,11 @@ static int intel_modeset_checks(struct intel_atomic_state *state) return ret; } - if (is_power_of_2(state->active_crtcs)) { + if (is_power_of_2(state->active_pipes)) { struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; - pipe = ilog2(state->active_crtcs); + pipe = ilog2(state->active_pipes); crtc = intel_get_crtc_for_pipe(dev_priv, pipe); crtc_state = intel_atomic_get_new_crtc_state(state, crtc); if (crtc_state && needs_modeset(crtc_state)) @@ -14191,7 +14191,7 @@ static int intel_atomic_commit(struct drm_device *dev, sizeof(state->min_cdclk)); memcpy(dev_priv->min_voltage_level, state->min_voltage_level, sizeof(state->min_voltage_level)); - dev_priv->active_crtcs = state->active_crtcs; + dev_priv->active_pipes = state->active_pipes; dev_priv->cdclk.force_min_cdclk = state->cdclk.force_min_cdclk; intel_cdclk_swap_state(state); @@ -16640,7 +16640,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct drm_connector_list_iter conn_iter; int i; - dev_priv->active_crtcs = 0; + dev_priv->active_pipes = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = @@ -16657,7 +16657,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->active = crtc_state->base.active; if (crtc_state->base.active) - dev_priv->active_crtcs |= 1 << crtc->pipe; + dev_priv->active_pipes |= BIT(crtc->pipe); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", crtc->base.base.id, crtc->base.name, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 449abaea619fc..12523456143f4 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -481,9 +481,9 @@ struct intel_atomic_state { * but the converse is not necessarily true; simply changing a mode may * not flip the final active status of any CRTC's */ - unsigned int active_pipe_changes; + u8 active_pipe_changes; - unsigned int active_crtcs; + u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 048896b1348b9..ba3f6d82ef26a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1468,7 +1468,7 @@ struct drm_i915_private { */ struct mutex dpll_lock; - unsigned int active_crtcs; + u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d3ea193cd0939..09f29a337313f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3761,18 +3761,18 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) /* * If there are no active CRTCs, no additional checks need be performed */ - if (hweight32(state->active_crtcs) == 0) + if (hweight32(state->active_pipes) == 0) return true; /* * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled */ - if (hweight32(state->active_crtcs) > 1) + if (hweight32(state->active_pipes) > 1) return false; /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(state->active_crtcs) - 1; + pipe = ffs(state->active_pipes) - 1; crtc = intel_get_crtc_for_pipe(dev_priv, pipe); crtc_state = to_intel_crtc_state(crtc->base.state); @@ -3867,14 +3867,14 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, if (WARN_ON(!state) || !crtc_state->base.active) { alloc->start = 0; alloc->end = 0; - *num_active = hweight32(dev_priv->active_crtcs); + *num_active = hweight32(dev_priv->active_pipes); return; } if (intel_state->active_pipe_changes) - *num_active = hweight32(intel_state->active_crtcs); + *num_active = hweight32(intel_state->active_pipes); else - *num_active = hweight32(dev_priv->active_crtcs); + *num_active = hweight32(dev_priv->active_pipes); ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate, *num_active, ddb); @@ -5464,7 +5464,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) * If this transaction isn't actually touching any CRTC's, don't * bother with watermark calculation. Note that if we pass this * test, we're guaranteed to hold at least one CRTC state mutex, - * which means we can safely use values like dev_priv->active_crtcs + * which means we can safely use values like dev_priv->active_pipes * since any racing commits that want to update them would need to * hold _all_ CRTC state mutexes. */ @@ -5489,13 +5489,13 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) state->active_pipe_changes = ~0; /* - * We usually only initialize state->active_crtcs if we + * We usually only initialize state->active_pipes if we * we're doing a modeset; make sure this field is always * initialized during the sanitization process that happens * on the first commit too. */ if (!state->modeset) - state->active_crtcs = dev_priv->active_crtcs; + state->active_pipes = dev_priv->active_pipes; } /* @@ -5811,7 +5811,7 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) hw->dirty_pipes |= drm_crtc_mask(&crtc->base); } - if (dev_priv->active_crtcs) { + if (dev_priv->active_pipes) { /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } From e8edae54c593bc9aea949fbf6b2a59fe53cd5d83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Aug 2019 20:30:30 +0300 Subject: [PATCH 020/331] drm/i915: Unconfuse pipe vs. crtc->index in i915_get_crtc_scanoutpos() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "pipe" argument passed in by the vblank code is in fact the crtc index. Don't assume that is the same as the pipe. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190821173033.24123-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 77391d8325bf3..8ac6f68499812 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -942,14 +942,14 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) return (position + crtc->scanline_offset) % vtotal; } -bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, +bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int index, bool in_vblank_irq, int *vpos, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, - pipe); + struct intel_crtc *crtc = to_intel_crtc(drm_crtc_from_index(dev, index)); + enum pipe pipe = crtc->pipe; int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; @@ -992,7 +992,7 @@ bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, /* No obvious pixelcount register. Only query vertical * scanout position from Display scan line register. */ - position = __intel_get_crtc_scanline(intel_crtc); + position = __intel_get_crtc_scanline(crtc); } else { /* Have access to pixelcount since start of frame. * We can split this into vertical and horizontal From d048a2684a413b33a28be277a7e9cee960ec8bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Aug 2019 20:30:31 +0300 Subject: [PATCH 021/331] drm/i915: Use enum pipe consistently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all "int pipe"s with "enum pipe pipe"s to make it clear what we're dealing with. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190821173033.24123-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 42 +++++++++---------- .../drm/i915/display/intel_display_types.h | 2 +- drivers/gpu/drm/i915/display/intel_dvo.c | 2 +- drivers/gpu/drm/i915/display/intel_lvds.c | 2 +- drivers/gpu/drm/i915/display/vlv_dsi.c | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 11 ++--- drivers/gpu/drm/i915/intel_pm.c | 2 +- 8 files changed, 33 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 7b74df03a1ef2..ea2915dde6abb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -490,7 +490,7 @@ static const struct intel_limit intel_limits_bxt = { /* WA Display #0827: Gen9:all */ static void -skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) +skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable) { if (enable) I915_WRITE(CLKGATE_DIS_PSL(pipe), @@ -4421,7 +4421,7 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -4464,7 +4464,7 @@ static void ironlake_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; @@ -4565,7 +4565,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -4698,7 +4698,7 @@ static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -4816,7 +4816,7 @@ static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -4853,7 +4853,7 @@ static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -4884,7 +4884,7 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -5199,7 +5199,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); @@ -5294,7 +5294,7 @@ static void lpt_pch_enable(const struct intel_atomic_state *state, lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } -static void cpt_verify_modeset(struct drm_device *dev, int pipe) +static void cpt_verify_modeset(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t dslreg = PIPEDSL(pipe); @@ -5633,7 +5633,7 @@ static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; if (crtc_state->pch_pfit.enabled) { /* Force use of hard-coded filter coefficients @@ -5746,7 +5746,7 @@ intel_post_enable_primary(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -5770,7 +5770,7 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -6293,7 +6293,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; if (WARN_ON(intel_crtc->active)) return; @@ -6426,7 +6426,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe, hsw_workaround_pipe; + enum pipe pipe = intel_crtc->pipe, hsw_workaround_pipe; enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; bool psl_clkgate_wa; @@ -6552,7 +6552,7 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * Sometimes spurious CPU pipe underruns happen when the @@ -6839,7 +6839,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; if (WARN_ON(intel_crtc->active)) return; @@ -6971,7 +6971,7 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * On gen2 planes are double buffered but the pipe isn't, so we must @@ -8543,7 +8543,7 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; struct dpll clock; u32 mdiv; int refclk = 100000; @@ -8653,7 +8653,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; enum dpio_channel port = vlv_pipe_to_channel(pipe); struct dpll clock; u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; @@ -11265,7 +11265,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; u32 dpll = pipe_config->dpll_hw_state.dpll; u32 fp; struct dpll clock; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 12523456143f4..96514dcc78128 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1509,7 +1509,7 @@ intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) drm_wait_one_vblank(&dev_priv->drm, pipe); } static inline void -intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe) +intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pipe) { const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 93baf366692ea..34193d04597a8 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -280,7 +280,7 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder, struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct intel_dvo *intel_dvo = enc_to_dvo(encoder); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 dvo_val; i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg; i915_reg_t dvo_srcdim_reg = intel_dvo->dev.dvo_srcdim_reg; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index b7c459a8931c7..c786abdc3336c 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -232,7 +232,7 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 temp; if (HAS_PCH_SPLIT(dev_priv)) { diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index a71b22bdd95b5..50064cde0724e 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -749,7 +749,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8c1d704254240..5c1a2b1e7d34b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -376,7 +376,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) static void gen8_display_interrupt_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - int pipe; + enum pipe pipe; for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8ac6f68499812..3f1b6ee157baf 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1716,7 +1716,7 @@ static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { - int pipe; + enum pipe pipe; spin_lock(&dev_priv->irq_lock); @@ -1741,6 +1741,7 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, status_mask = PIPE_FIFO_UNDERRUN_STATUS; switch (pipe) { + default: case PIPE_A: iir_bit = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT; break; @@ -2136,7 +2137,7 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - int pipe; + enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK; ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx); @@ -2222,7 +2223,7 @@ static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - int pipe; + enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT; ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt); @@ -3246,7 +3247,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv) static void gen8_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - int pipe; + enum pipe pipe; gen8_master_intr_disable(dev_priv->uncore.regs); @@ -3271,7 +3272,7 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) static void gen11_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - int pipe; + enum pipe pipe; gen11_master_intr_disable(dev_priv->uncore.regs); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 09f29a337313f..437cd50e5d067 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8858,7 +8858,7 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { - int pipe; + enum pipe pipe; u32 val; /* From c08e91323920ec4b3a4d889af3988e300b1902f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Aug 2019 20:30:32 +0300 Subject: [PATCH 022/331] drm/i915: s/num_active_crtcs/num_active_pipes/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set a good example and talk about pipes rather than crtcs. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190821173033.24123-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 437cd50e5d067..b4b9609db0921 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1490,7 +1490,7 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv, struct g4x_wm_values *wm) { struct intel_crtc *crtc; - int num_active_crtcs = 0; + int num_active_pipes = 0; wm->cxsr = true; wm->hpll_en = true; @@ -1509,10 +1509,10 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv, if (!wm_state->fbc_en) wm->fbc_en = false; - num_active_crtcs++; + num_active_pipes++; } - if (num_active_crtcs != 1) { + if (num_active_pipes != 1) { wm->cxsr = false; wm->hpll_en = false; wm->fbc_en = false; @@ -2098,7 +2098,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { struct intel_crtc *crtc; - int num_active_crtcs = 0; + int num_active_pipes = 0; wm->level = dev_priv->wm.max_level; wm->cxsr = true; @@ -2112,14 +2112,14 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, if (!wm_state->cxsr) wm->cxsr = false; - num_active_crtcs++; + num_active_pipes++; wm->level = min_t(int, wm->level, wm_state->num_levels - 1); } - if (num_active_crtcs != 1) + if (num_active_pipes != 1) wm->cxsr = false; - if (num_active_crtcs > 1) + if (num_active_pipes > 1) wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { From 0b14d96820d1fd05ea610a0f5f952b34e5a24f2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Aug 2019 20:30:33 +0300 Subject: [PATCH 023/331] drm/i915: Use hweight8() for 8bit masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use hweight8() instead of hweight32() for 8bit masks. Doesn't actually matter for us since the arch code will go for hweight32() anyway, but maybe we stil want to do this for documentation purposes? Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190821173033.24123-5-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b4b9609db0921..4fa9bc83c8b4f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1327,8 +1327,8 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state); struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; - int num_active_planes = hweight32(crtc_state->active_planes & - ~BIT(PLANE_CURSOR)); + int num_active_planes = hweight8(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); const struct g4x_pipe_wm *raw; const struct intel_plane_state *old_plane_state; const struct intel_plane_state *new_plane_state; @@ -1659,7 +1659,7 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); - int num_active_planes = hweight32(active_planes); + int num_active_planes = hweight8(active_planes); const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; int sprite0_fifo_extra = 0; @@ -1848,8 +1848,8 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; - int num_active_planes = hweight32(crtc_state->active_planes & - ~BIT(PLANE_CURSOR)); + int num_active_planes = hweight8(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); const struct intel_plane_state *old_plane_state; const struct intel_plane_state *new_plane_state; @@ -3761,14 +3761,14 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) /* * If there are no active CRTCs, no additional checks need be performed */ - if (hweight32(state->active_pipes) == 0) + if (hweight8(state->active_pipes) == 0) return true; /* * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled */ - if (hweight32(state->active_pipes) > 1) + if (hweight8(state->active_pipes) > 1) return false; /* Since we're now guaranteed to only have one active CRTC... */ @@ -3867,14 +3867,14 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, if (WARN_ON(!state) || !crtc_state->base.active) { alloc->start = 0; alloc->end = 0; - *num_active = hweight32(dev_priv->active_pipes); + *num_active = hweight8(dev_priv->active_pipes); return; } if (intel_state->active_pipe_changes) - *num_active = hweight32(intel_state->active_pipes); + *num_active = hweight8(intel_state->active_pipes); else - *num_active = hweight32(dev_priv->active_pipes); + *num_active = hweight8(dev_priv->active_pipes); ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate, *num_active, ddb); From 6dcb85a0ad990455ae7c596e3fc966ad9c1ba9c5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Aug 2019 14:26:46 +0100 Subject: [PATCH 024/331] drm/i915: Hold irq-off for the entire fake lock period Sadly lockdep records when the irqs are re-enabled and then marks up the fake lock as being irq-unsafe. Our hand is forced and so we must mark up the entire fake lock critical section as irq-off. Hopefully this is the last tweak required! v2: Not quite, we need to mark the timeline spinlock as irqsafe. That was a genuine bug being hidden by the earlier lockdep splat. Fixes: d67739268cf0 ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190823132700.25286-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 28 +++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_reset.c | 9 ++++---- drivers/gpu/drm/i915/gt/intel_timeline.c | 10 ++++---- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++------ drivers/gpu/drm/i915/i915_request.c | 9 ++++---- 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index a372d4ea9370a..65b5ca74b3947 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -39,27 +39,32 @@ static int __engine_unpark(struct intel_wakeref *wf) #if IS_ENABLED(CONFIG_LOCKDEP) -static inline void __timeline_mark_lock(struct intel_context *ce) +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) { unsigned long flags; local_irq_save(flags); mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); - local_irq_restore(flags); + + return flags; } -static inline void __timeline_mark_unlock(struct intel_context *ce) +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_); + local_irq_restore(flags); } #else -static inline void __timeline_mark_lock(struct intel_context *ce) +static inline unsigned long __timeline_mark_lock(struct intel_context *ce) { + return 0; } -static inline void __timeline_mark_unlock(struct intel_context *ce) +static inline void __timeline_mark_unlock(struct intel_context *ce, + unsigned long flags) { } @@ -68,6 +73,8 @@ static inline void __timeline_mark_unlock(struct intel_context *ce) static bool switch_to_kernel_context(struct intel_engine_cs *engine) { struct i915_request *rq; + unsigned long flags; + bool result = true; /* Already inside the kernel context, safe to power down. */ if (engine->wakeref_serial == engine->serial) @@ -89,12 +96,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * retiring the last request, thus all rings should be empty and * all timelines idle. */ - __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(engine->kernel_context); rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ - return true; + goto out_unlock; intel_timeline_enter(rq->timeline); @@ -110,9 +117,10 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) __intel_wakeref_defer_park(&engine->wakeref); __i915_request_queue(rq, NULL); - __timeline_mark_unlock(engine->kernel_context); - - return false; + result = false; +out_unlock: + __timeline_mark_unlock(engine->kernel_context, flags); + return result; } static int __engine_park(struct intel_wakeref *wf) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 077716442c90a..b9d84d52e9864 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -792,6 +792,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; + unsigned long flags; if (!test_bit(I915_WEDGED, >->reset.flags)) return true; @@ -811,7 +812,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * * No more can be submitted until we reset the wedged bit. */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { struct i915_request *rq; @@ -819,7 +820,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (!rq) continue; - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); /* * All internal dependencies (i915_requests) will have @@ -832,10 +833,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) i915_request_put(rq); /* Restart iteration after droping lock */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); intel_gt_sanitize(gt, false); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 02fbe11b671be..9cb01d9828f1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -337,6 +337,7 @@ int intel_timeline_pin(struct intel_timeline *tl) void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; + unsigned long flags; lockdep_assert_held(&tl->mutex); @@ -345,14 +346,15 @@ void intel_timeline_enter(struct intel_timeline *tl) return; GEM_BUG_ON(!tl->active_count); /* overflow? */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_add(&tl->link, &timelines->active_list); - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); } void intel_timeline_exit(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; + unsigned long flags; lockdep_assert_held(&tl->mutex); @@ -360,9 +362,9 @@ void intel_timeline_exit(struct intel_timeline *tl) if (--tl->active_count) return; - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_del(&tl->link); - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); /* * Since this timeline is idle, all bariers upon which we were waiting diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eb31b69a316a9..ec9a46c276de4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -889,12 +889,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) static long wait_for_timelines(struct drm_i915_private *i915, - unsigned int flags, long timeout) + unsigned int wait, long timeout) { struct intel_gt_timelines *timelines = &i915->gt.timelines; struct intel_timeline *tl; + unsigned long flags; - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { struct i915_request *rq; @@ -902,7 +903,7 @@ wait_for_timelines(struct drm_i915_private *i915, if (!rq) continue; - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); /* * "Race-to-idle". @@ -913,19 +914,19 @@ wait_for_timelines(struct drm_i915_private *i915, * want to complete as quickly as possible to avoid prolonged * stalls, so allow the gpu to boost to maximum clocks. */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) + if (wait & I915_WAIT_FOR_IDLE_BOOST) gen6_rps_boost(rq); - timeout = i915_request_wait(rq, flags, timeout); + timeout = i915_request_wait(rq, wait, timeout); i915_request_put(rq); if (timeout < 0) return timeout; /* restart after reacquiring the lock */ - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); tl = list_entry(&timelines->active_list, typeof(*tl), link); } - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); return timeout; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f1a0a57fc6fc8..a53777dd371c3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1465,9 +1465,10 @@ bool i915_retire_requests(struct drm_i915_private *i915) { struct intel_gt_timelines *timelines = &i915->gt.timelines; struct intel_timeline *tl, *tn; + unsigned long flags; LIST_HEAD(free); - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) continue; @@ -1475,11 +1476,11 @@ bool i915_retire_requests(struct drm_i915_private *i915) intel_timeline_get(tl); GEM_BUG_ON(!tl->active_count); tl->active_count++; /* pin the list element */ - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); retire_requests(tl); - spin_lock(&timelines->lock); + spin_lock_irqsave(&timelines->lock, flags); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); @@ -1494,7 +1495,7 @@ bool i915_retire_requests(struct drm_i915_private *i915) list_add(&tl->link, &free); } } - spin_unlock(&timelines->lock); + spin_unlock_irqrestore(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); From 191797a892c91aec6cdffc4e05696b722d779fe3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Aug 2019 15:14:21 +0100 Subject: [PATCH 025/331] drm/i915/gtt: Preallocate Braswell top-level page directory In order for the Braswell top-level PD to remain the same from the time of request construction to its submission onto HW, as we may be asynchronously rewriting the page tables (thus changing the expected register state after having already stored the old addresses in the request), the top level PD must be preallocated. So wave goodbye to our lazy allocation of those 4x2 pages. v2: A little bit of write-flushing required (presumably it always has been required, but now we are more susceptible and it is showing up!) v3: Put back the forced-PD-reload on every batch, we can't survive without it and explicitly marking the context for PD reload makes Braswell turn nasty. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190823141421.2398-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 8 +++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1cdfe05514c3f..1f735ca9b1736 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1003,12 +1003,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) intel_ring_advance(rq, cs); } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + int err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; for (i = GEN8_3LVL_PDPES; i--; ) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2a425db1cfd89..e0e9b9b52544d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -168,6 +168,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, pte_flags |= PTE_READ_ONLY; vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + wmb(); return 0; } @@ -1426,6 +1427,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) set_pd_entry(pd, idx, pde); atomic_inc(px_used(pde)); /* keep pinned */ } + wmb(); return 0; } @@ -1513,11 +1515,9 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) } if (!i915_vm_is_4lvl(&ppgtt->vm)) { - if (intel_vgpu_active(i915)) { - err = gen8_preallocate_top_level_pdp(ppgtt); - if (err) - goto err_free_pd; - } + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) + goto err_free_pd; } ppgtt->vm.insert_entries = gen8_ppgtt_insert; From 636e83f2f208555c3d19d8b454ebdd8d8f4652cc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Aug 2019 16:39:44 +0100 Subject: [PATCH 026/331] drm/i915: Flush the existing fence before GGTT read/write Our fence management is lazy, very lazy. If the user marks an object as untiled, we do not immediately flush the fence but merely mark it as dirty. On the next use we have to remember to check and remove the fence, by which time we hope it is idle and we do not have to wait. v2: Throw away the old fence on the next ggtt_pin. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111468 Fixes: 1f7fd484fff1 ("drm/i915: Replace i915_vma_put_fence()") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190823153944.20630-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ec9a46c276de4..95e7c52cf8edc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1027,6 +1027,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } + if (vma->fence && !i915_gem_object_is_tiled(obj)) { + mutex_lock(&vma->vm->mutex); + ret = i915_vma_revoke_fence(vma); + mutex_unlock(&vma->vm->mutex); + if (ret) + return ERR_PTR(ret); + } + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); From 936ad29de8129747bbfeec8a85e38c8a830bd5da Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 19 Aug 2019 18:23:27 -0700 Subject: [PATCH 027/331] drm/i915/uc: define GuC and HuC FWs for EHL First uc firmware release for EHL. Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Anusha Srivatsa Cc: Michal Wajdeczko Reviewed-by: Stuart Summers Tested-by: Matt Roper Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190820012327.36443-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index bd22bf11adadd..296a82603be0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -39,12 +39,13 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Must be ordered based on platform + revid, from newer to older. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398)) + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ + fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398)) #define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \ "i915/" \ From 77715906921251bd9f75bcf4825f176df3f91208 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Aug 2019 19:14:55 +0100 Subject: [PATCH 028/331] drm/i915: Keep drm_i915_file_private around under RCU Ensure that the drm_i915_file_private continues to exist as we attempt to remove a request from its list, which may race with the destruction of the file. <6> [38.380714] [IGT] gem_ctx_create: starting subtest basic-files <0> [42.201329] BUG: spinlock bad magic on CPU#0, kworker/u16:0/7 <4> [42.201356] general protection fault: 0000 [#1] PREEMPT SMP PTI <4> [42.201371] CPU: 0 PID: 7 Comm: kworker/u16:0 Tainted: G U 5.3.0-rc5-CI-Patchwork_14169+ #1 <4> [42.201391] Hardware name: Dell Inc. OptiPlex 745 /0GW726, BIOS 2.3.1 05/21/2007 <4> [42.201594] Workqueue: i915 retire_work_handler [i915] <4> [42.201614] RIP: 0010:spin_dump+0x5a/0x90 <4> [42.201625] Code: 00 48 8d 88 c0 06 00 00 48 c7 c7 00 71 09 82 e8 35 ef 00 00 48 85 db 44 8b 4d 08 41 b8 ff ff ff ff 48 c7 c1 0b cd 0f 82 74 0e <44> 8b 83 e0 04 00 00 48 8d 8b c0 06 00 00 8b 55 04 48 89 ee 48 c7 <4> [42.201660] RSP: 0018:ffffc9000004bd80 EFLAGS: 00010202 <4> [42.201673] RAX: 0000000000000031 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffff820fcd0b <4> [42.201688] RDX: 0000000000000000 RSI: ffff88803de266f8 RDI: 00000000ffffffff <4> [42.201703] RBP: ffff888038381ff8 R08: 00000000ffffffff R09: 000000006b6b6b6b <4> [42.201718] R10: 0000000041cb0b89 R11: 646162206b636f6c R12: ffff88802a618500 <4> [42.201733] R13: ffff88802b32c288 R14: ffff888038381ff8 R15: ffff88802b32c250 <4> [42.201748] FS: 0000000000000000(0000) GS:ffff88803de00000(0000) knlGS:0000000000000000 <4> [42.201765] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [42.201778] CR2: 00007f2cefc6d180 CR3: 00000000381ee000 CR4: 00000000000006f0 <4> [42.201793] Call Trace: <4> [42.201805] do_raw_spin_lock+0x66/0xb0 <4> [42.201898] i915_request_retire+0x548/0x7c0 [i915] <4> [42.201989] retire_requests+0x4d/0x60 [i915] <4> [42.202078] i915_retire_requests+0x144/0x2e0 [i915] <4> [42.202169] retire_work_handler+0x10/0x40 [i915] Recently, in commit 44c22f3f1a0a ("drm/i915: Serialize insertion into the file->mm.request_list"), we fixed a race on insertion. Now, it appears we also have a race with destruction! Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190823181455.31910-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 4 +--- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 6 +++++- drivers/gpu/drm/i915/i915_request.c | 13 +++++++------ 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 1e372420771be..540ef0551789f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - if (target) { + if (target && xchg(&target->file_priv, NULL)) list_del(&target->client_link); - target->file_priv = NULL; - } target = request; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b5b2a64753e6e..723b9b7202b06 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1730,7 +1730,7 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) i915_gem_release(dev, file); mutex_unlock(&dev->struct_mutex); - kfree(file_priv); + kfree_rcu(file_priv, rcu); /* Catch up with all the deferred frees from "this" client */ i915_gem_flush_free_objects(to_i915(dev)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ba3f6d82ef26a..f5e39a3bfff6a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -185,7 +185,11 @@ struct i915_mmu_object; struct drm_i915_file_private { struct drm_i915_private *dev_priv; - struct drm_file *file; + + union { + struct drm_file *file; + struct rcu_head rcu; + }; struct { spinlock_t lock; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a53777dd371c3..18865ce04e130 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -169,16 +169,17 @@ remove_from_client(struct i915_request *request) { struct drm_i915_file_private *file_priv; - file_priv = READ_ONCE(request->file_priv); - if (!file_priv) + if (!READ_ONCE(request->file_priv)) return; - spin_lock(&file_priv->mm.lock); - if (request->file_priv) { + rcu_read_lock(); + file_priv = xchg(&request->file_priv, NULL); + if (file_priv) { + spin_lock(&file_priv->mm.lock); list_del(&request->client_link); - request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); } - spin_unlock(&file_priv->mm.lock); + rcu_read_unlock(); } static void free_capture_list(struct i915_request *request) From 75b974a859e5d9a3ceaa3aa03bbc27d404b32231 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 24 Aug 2019 00:51:41 +0100 Subject: [PATCH 029/331] drm/i915/selftests: Teach igt_gpu_fill_dw() to take intel_context Avoid having to pass around (ctx, engine) everywhere by passing the actual intel_context we intend to use. Today we preach this lesson to igt_gpu_fill_dw and its callers' callers. The immediate benefit for the GEM selftests is that we aim to use the GEM context as the control, the source of the engines on which to test the GEM context. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190823235141.31799-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 103 ++++++++++-------- .../drm/i915/gem/selftests/i915_gem_context.c | 70 +++++++----- .../drm/i915/gem/selftests/igt_gem_utils.c | 26 ++--- .../drm/i915/gem/selftests/igt_gem_utils.h | 13 +-- 4 files changed, 116 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8de83c6d81f54..c5cea43792163 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -879,9 +879,8 @@ static int igt_mock_ppgtt_64K(void *arg) return err; } -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_write(struct intel_context *ce, + struct i915_vma *vma, u32 dw, u32 val) { @@ -893,7 +892,7 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32), + return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); } @@ -929,18 +928,16 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, u32 dword, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -954,7 +951,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, * The ggtt may have some pages reserved so * refrain from erroring out. */ - if (err == -ENOSPC && i915_is_ggtt(vm)) + if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; goto out_vma_close; @@ -964,7 +961,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, if (err) goto out_vma_unpin; - err = gpu_write(vma, ctx, engine, dword, val); + err = gpu_write(ce, vma, dword, val); if (err) { pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin; @@ -987,14 +984,13 @@ static int __igt_write_huge(struct i915_gem_context *ctx, static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int max_page_size; - unsigned int id; + unsigned int count; u64 max; u64 num; u64 size; @@ -1008,19 +1004,18 @@ static int igt_write_huge(struct i915_gem_context *ctx, if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) size = round_up(size, I915_GTT_PAGE_SIZE_2M); - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + count = 0; + max = U64_MAX; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) continue; - } - engines[n++] = engine; - } + max = min(max, ce->vm->total); + n++; + } + i915_gem_context_unlock_engines(ctx); if (!n) return 0; @@ -1029,23 +1024,30 @@ static int igt_write_huge(struct i915_gem_context *ctx, * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array. */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); + order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64(max - size, max_page_size); + /* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0. */ i = 0; + engines = i915_gem_context_lock_engines(ctx); for_each_prime_number_from(num, 0, max) { u64 offset_low = num * max_page_size; u64 offset_high = (max - num) * max_page_size; u32 dword = offset_in_page(num) / 4; + struct intel_context *ce; - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; /* * In order to utilize 64K pages we need to both pad the vma @@ -1057,22 +1059,23 @@ static int igt_write_huge(struct i915_gem_context *ctx, offset_low = round_down(offset_low, I915_GTT_PAGE_SIZE_2M); - err = __igt_write_huge(ctx, engine, obj, size, offset_low, + err = __igt_write_huge(ce, obj, size, offset_low, dword, num + 1); if (err) break; - err = __igt_write_huge(ctx, engine, obj, size, offset_high, + err = __igt_write_huge(ce, obj, size, offset_high, dword, num + 1); if (err) break; if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, + "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, ce->engine->name, offset_low, offset_high, max_page_size)) break; } + i915_gem_context_unlock_engines(ctx); kfree(order); @@ -1316,10 +1319,10 @@ static int igt_ppgtt_pin_update(void *arg) unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - struct intel_engine_cs *engine; - enum intel_engine_id id; unsigned int n; int first, last; int err; @@ -1419,14 +1422,18 @@ static int igt_ppgtt_pin_update(void *arg) */ n = 0; - for_each_engine(engine, dev_priv, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } + i915_gem_context_unlock_engines(ctx); + if (err) + goto out_unpin; + while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) @@ -1507,8 +1514,8 @@ static int igt_shrink_thp(void *arg) struct drm_i915_private *i915 = ctx->i915; struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; @@ -1548,16 +1555,19 @@ static int igt_shrink_thp(void *arg) goto out_unpin; n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - + i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); + if (err) + goto out_close; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1583,10 +1593,9 @@ static int igt_shrink_thp(void *arg) while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - out_unpin: i915_vma_unpin(vma); out_close: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3e6f4a65d3560..3adb60c2fd1f6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -166,19 +166,17 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj) return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; } -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_fill(struct intel_context *ce, + struct drm_i915_gem_object *obj, unsigned int dw) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_vma *vma; int err; - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(obj->base.size > ce->vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -200,9 +198,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, * whilst checking that each context provides a unique view * into the object. */ - err = igt_gpu_fill_dw(vma, - ctx, - engine, + err = igt_gpu_fill_dw(ce, vma, (dw * real_page_count(obj)) << PAGE_SHIFT | (dw * sizeof(u32)), real_page_count(obj), @@ -305,22 +301,21 @@ static int file_add_object(struct drm_file *file, } static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, +create_test_object(struct i915_address_space *vm, struct drm_file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm; u64 size; int err; /* Keep in GEM's good graces */ - i915_retire_requests(ctx->i915); + i915_retire_requests(vm->i915); size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); if (IS_ERR(obj)) return obj; @@ -393,6 +388,7 @@ static int igt_ctx_exec(void *arg) dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; ctx = live_context(i915, file); if (IS_ERR(ctx)) { @@ -400,15 +396,20 @@ static int igt_ctx_exec(void *arg) goto out_unlock; } + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); goto out_unlock; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); + intel_context_put(ce); + if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -509,6 +510,7 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -518,22 +520,26 @@ static int igt_shared_ctx_exec(void *arg) __assign_ppgtt(ctx, parent->vm); + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); if (!obj) { - obj = create_test_object(parent, file, &objects); + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); + intel_context_put(ce); + kernel_context_close(ctx); + if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, ctx->hw_id, yesno(!!ctx->vm), err); - kernel_context_close(ctx); goto out_test; } @@ -544,8 +550,6 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; - - kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", ncontexts, engine->name, ndwords); @@ -604,6 +608,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1082,17 +1088,19 @@ static int igt_ctx_readonly(void *arg) ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; + struct i915_gem_engines_iter it; + struct intel_context *ce; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + i915_gem_context_unlock_engines(ctx); goto out_unlock; } @@ -1100,12 +1108,13 @@ static int igt_ctx_readonly(void *arg) i915_gem_object_set_readonly(obj); } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, + ce->engine->name, ctx->hw_id, yesno(!!ctx->vm), err); + i915_gem_context_unlock_engines(ctx); goto out_unlock; } @@ -1115,6 +1124,7 @@ static int igt_ctx_readonly(void *arg) } ndwords++; } + i915_gem_context_unlock_engines(ctx); } pr_info("Submitted %lu dwords (across %u engines)\n", ndwords, RUNTIME_INFO(i915)->num_engines); @@ -1197,6 +1207,8 @@ static int write_to_scratch(struct i915_gem_context *ctx, __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(engine->gt); + vma = i915_vma_instance(obj, ctx->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1296,6 +1308,8 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(engine->gt); + vma = i915_vma_instance(obj, ctx->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 57ece53c1075a..ee5dc13a30b33 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gt.h" #include "i915_vma.h" #include "i915_drv.h" @@ -84,6 +85,8 @@ igt_emit_store_dw(struct i915_vma *vma, *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -101,40 +104,35 @@ igt_emit_store_dw(struct i915_vma *vma, return ERR_PTR(err); } -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val) +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_request *rq; struct i915_vma *batch; unsigned int flags; int err; - GEM_BUG_ON(vma->size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); GEM_BUG_ON(!i915_vma_is_pinned(vma)); batch = igt_emit_store_dw(vma, offset, count, val); if (IS_ERR(batch)) return PTR_ERR(batch); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } flags = 0; - if (INTEL_GEN(vm->i915) <= 5) + if (INTEL_GEN(ce->vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); if (err) goto err_request; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 361a7ef866b07..4221cf84d1756 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -11,9 +11,11 @@ struct i915_request; struct i915_gem_context; -struct intel_engine_cs; struct i915_vma; +struct intel_context; +struct intel_engine_cs; + struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -23,11 +25,8 @@ igt_emit_store_dw(struct i915_vma *vma, unsigned long count, u32 val); -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val); +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val); #endif /* __IGT_GEM_UTILS_H__ */ From 52988009843160c5b366b4082ed6df48041c655c Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Fri, 23 Aug 2019 14:57:31 +0800 Subject: [PATCH 030/331] drm/i915: to make vgpu ppgtt notificaiton as atomic operation vgpu ppgtt notification was split into 2 steps, the first step is to update PVINFO's pdp register and then write PVINFO's g2v_notify register with action code to tirgger ppgtt notification to GVT side. currently these steps were not atomic operations due to no any protection, so it is easy to enter race condition state during the MTBF, stress and IGT test to cause GPU hang. the solution is to add a lock to make vgpu ppgtt notication as atomic operation. Cc: stable@vger.kernel.org Signed-off-by: Xiaolin Zhang Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1566543451-13955-1-git-send-email-xiaolin.zhang@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++----- drivers/gpu/drm/i915/i915_vgpu.c | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f5e39a3bfff6a..b42651a387d96 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -966,6 +966,7 @@ struct i915_frontbuffer_tracking { }; struct i915_virtual_gpu { + struct mutex lock; /* serialises sending of g2v_notify command pkts */ bool active; u32 caps; }; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e0e9b9b52544d..0db82921fb38b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -834,10 +834,9 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) ppgtt->pd_dirty_engines = ALL_ENGINES; } -static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) +static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { - struct i915_address_space *vm = &ppgtt->vm; - struct drm_i915_private *dev_priv = vm->i915; + struct drm_i915_private *dev_priv = ppgtt->vm.i915; enum vgt_g2v_type msg; int i; @@ -846,7 +845,9 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) else atomic_dec(px_used(ppgtt->pd)); - if (i915_vm_is_4lvl(vm)) { + mutex_lock(&dev_priv->vgpu.lock); + + if (i915_vm_is_4lvl(&ppgtt->vm)) { const u64 daddr = px_dma(ppgtt->pd); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); @@ -866,9 +867,10 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); } + /* g2v_notify atomically (via hv trap) consumes the message packet. */ I915_WRITE(vgtif_reg(g2v_notify), msg); - return 0; + mutex_unlock(&dev_priv->vgpu.lock); } /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 39bebf16edbee..968be26735c50 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -94,6 +94,7 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv) dev_priv->vgpu.caps = readl(shared_area + vgtif_offset(vgt_caps)); dev_priv->vgpu.active = true; + mutex_init(&dev_priv->vgpu.lock); DRM_INFO("Virtual GPU for Intel GVT-g detected.\n"); out: From acd674af95d3f627062007429b9c195c6b32361d Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 23 Aug 2019 16:52:51 -0400 Subject: [PATCH 031/331] drm/i915: Call dma_set_max_seg_size() in i915_driver_hw_probe() Currently, we don't call dma_set_max_seg_size() for i915 because we intentionally do not limit the segment length that the device supports. However, this results in a warning being emitted if we try to map anything larger than SZ_64K on a kernel with CONFIG_DMA_API_DEBUG_SG enabled: [ 7.751926] DMA-API: i915 0000:00:02.0: mapping sg segment longer than device claims to support [len=98304] [max=65536] [ 7.751934] WARNING: CPU: 5 PID: 474 at kernel/dma/debug.c:1220 debug_dma_map_sg+0x20f/0x340 This was originally brought up on https://bugs.freedesktop.org/show_bug.cgi?id=108517 , and the consensus there was it wasn't really useful to set a limit (and that dma-debug isn't really all that useful for i915 in the first place). Unfortunately though, CONFIG_DMA_API_DEBUG_SG is enabled in the debug configs for various distro kernels. Since a WARN_ON() will disable automatic problem reporting (and cause any CI with said option enabled to start complaining), we really should just fix the problem. Note that as me and Chris Wilson discussed, the other solution for this would be to make DMA-API not make such assumptions when a driver hasn't explicitly set a maximum segment size. But, taking a look at the commit which originally introduced this behavior, commit 78c47830a5cb ("dma-debug: check scatterlist segments"), there is an explicit mention of this assumption and how it applies to devices with no segment size: Conversely, devices which are less limited than the rather conservative defaults, or indeed have no limitations at all (e.g. GPUs with their own internal MMU), should be encouraged to set appropriate dma_parms, as they may get more efficient DMA mapping performance out of it. So unless there's any concerns (I'm open to discussion!), let's just follow suite and call dma_set_max_seg_size() with UINT_MAX as our limit to silence any warnings. Changes since v3: * Drop patch for enabling CONFIG_DMA_API_DEBUG_SG in CI. It looks like just turning it on causes the kernel to spit out bogus WARN_ONs() during some igt tests which would otherwise require teaching igt to disable the various DMA-API debugging options causing this. This is too much work to be worth it, since DMA-API debugging is useless for us. So, we'll just settle with this single patch to squelch WARN_ONs() during driver load for users that have CONFIG_DMA_API_DEBUG_SG turned on for some reason. * Move dma_set_max_seg_size() call into i915_driver_hw_probe() - Chris Wilson Signed-off-by: Lyude Paul Reviewed-by: Chris Wilson Cc: # v4.18+ Link: https://patchwork.freedesktop.org/patch/msgid/20190823205251.14298-1-lyude@redhat.com --- drivers/gpu/drm/i915/i915_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 723b9b7202b06..9478f704a315e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1279,6 +1279,12 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); + /* + * We don't have a max segment size, so set it to the max so sg's + * debugging layer doesn't complain + */ + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + /* overlay on gen2 is broken and can't address above 1G */ if (IS_GEN(dev_priv, 2)) { ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(30)); From 1d5b7773314994d23f2fc4acfa4278929b03ffd5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2019 08:21:22 +0100 Subject: [PATCH 032/331] drm/i915/selftests: Add the usual batch vma managements to st_workarounds To properly handle asynchronous migration of batch objects, we need to couple the fences on the incoming batch into the request and should not assume that they always start idle. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190826072149.9447-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index d06d68ac2a3b2..999a98f00494f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -565,6 +565,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + err = engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, 0); @@ -850,6 +858,14 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + /* Perform the writes from an unprivileged "user" batch */ err = engine->emit_bb_start(rq, batch->node.start, 0, 0); From ebfdf5cd806b3bbf1ff79e69bce6a28df8bbe39d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2019 14:07:50 +0100 Subject: [PATCH 033/331] drm/i915: Use NOEVICT for first pass on attemping to pin a GGTT mmap The intention is that we first try to pin the current vma into the mappable aperture only if it is already in use or it fits in the free space and will not cause contention. The first attempt was meant to be using PIN_NOEVICT to reuse the current vma if possible, following up with different eviction strategies. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111485 Fixes: 6846895fde05 ("drm/i915: Replace PIN_NONFAULT with calls to PIN_NOEVICT") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190826130750.17272-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 595539a09e385..261c9bd83f518 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -265,7 +265,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK /* NOWARN */ | - PIN_NOSEARCH); + PIN_NOEVICT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = From 21b0c32bdaba7c2e365d9faca0536fc40dd056d4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Aug 2019 10:49:33 +0100 Subject: [PATCH 034/331] drm/i915/selftests: Markup impossible error pointers If we create a new live_context() we should have a mapping for each engine. Document that assumption with an assertion. Reported-by: Dan Carpenter Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190827094933.13778-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3adb60c2fd1f6..37a177e376659 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -397,6 +397,7 @@ static int igt_ctx_exec(void *arg) } ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); if (!obj) { obj = create_test_object(ce->vm, file, &objects); @@ -521,6 +522,8 @@ static int igt_shared_ctx_exec(void *arg) __assign_ppgtt(ctx, parent->vm); ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + if (!obj) { obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { From f52c6d0df6909a0412c347c8e442acc22ce94747 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Aug 2019 14:26:31 +0100 Subject: [PATCH 035/331] drm/i915: Only activate i915_active debugobject once The point of debug_object_activate is to mark the first, and only the first, acquisition. The object then remains active until the last release. However, we marked up all successful first acquires even though we allowed concurrent parties to try and acquire the i915_active simultaneously (serialised by the i915_active.mutex). Testcase: igt/gem_mmap_gtt/fault-concurrent Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190827132631.18627-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_active.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 48e16ad93bbdd..6a447f1d01103 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -92,12 +92,16 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { - debug_object_activate(ref, &active_debug_desc); + lockdep_assert_held(&ref->mutex); + if (!atomic_read(&ref->count)) /* before the first inc */ + debug_object_activate(ref, &active_debug_desc); } static void debug_active_deactivate(struct i915_active *ref) { - debug_object_deactivate(ref, &active_debug_desc); + lockdep_assert_held(&ref->mutex); + if (!atomic_read(&ref->count)) /* after the last dec */ + debug_object_deactivate(ref, &active_debug_desc); } static void debug_active_fini(struct i915_active *ref) From cccdce1dd061c0b8ae156f026a3ee2c9d58613d3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Aug 2019 14:59:35 +0100 Subject: [PATCH 036/331] drm/i915: Make engine's batch pool safe for use with virtual engines A virtual engine itself does not have a batch pool, but we can gleefully use any of its siblings instead. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190827135935.3831-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_pool.c | 12 +++++++++++- drivers/gpu/drm/i915/gt/intel_engine_pool.h | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 12 ++++++++++++ drivers/gpu/drm/i915/gt/intel_lrc.h | 4 ++++ 6 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f813fcb8ceb6e..5a2238d444237 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1145,7 +1145,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, u32 *cmd; int err; - pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE); + pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1963,7 +1963,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) struct i915_vma *vma; int err; - pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); + pool = intel_engine_get_pool(eb->engine, eb->batch_len); if (IS_ERR(pool)) return ERR_CAST(pool); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 6415f9a17e2d5..5bd8de124d74d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -32,7 +32,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(vma->size, block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_pool_get(&ce->engine->pool, size); + pool = intel_engine_get_pool(ce->engine, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -216,7 +216,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(dst->size, block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_pool_get(&ce->engine->pool, size); + pool = intel_engine_get_pool(ce->engine, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 4cd54c5699111..97d36cca8ded1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -107,9 +107,19 @@ node_create(struct intel_engine_pool *pool, size_t sz) return node; } +static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +{ + if (intel_engine_is_virtual(engine)) + engine = intel_virtual_engine_get_sibling(engine, 0); + + GEM_BUG_ON(!engine); + return &engine->pool; +} + struct intel_engine_pool_node * -intel_engine_pool_get(struct intel_engine_pool *pool, size_t size) +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) { + struct intel_engine_pool *pool = lookup_pool(engine); struct intel_engine_pool_node *node; struct list_head *list; unsigned long flags; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h index 8d069efd94576..7e2123b335949 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h @@ -12,7 +12,7 @@ #include "i915_request.h" struct intel_engine_pool_node * -intel_engine_pool_get(struct intel_engine_pool *pool, size_t size); +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); static inline int intel_engine_pool_mark_active(struct intel_engine_pool_node *node, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d42584439f515..cfbdcca76ff06 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3851,6 +3851,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, return 0; } +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index c2bba82bcc16a..dc0252e0589e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -131,4 +131,8 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling); + #endif /* _INTEL_LRC_H_ */ From 6056517ab8c30fb239b08c06ef577677cbcd409c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 23 Aug 2019 01:20:37 -0700 Subject: [PATCH 037/331] drm/i915/tgl: Guard and warn if more than one eDP panel is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On TGL+ it's possible to have PSR1 enabled in other ports besides DDIA. PSR2 is still limited to DDIA. However currently we handle only one instance of PSR struct. Lets guard intel_psr_init_dpcd() against multiple eDP panels and warn about it. v2: Reword commit message to be TGL+ only and with the info where PSR1/PSR2 are supported (Lucas) Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-6-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 28b62e5872044..78e920015a000 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -283,6 +283,11 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + if (dev_priv->psr.dp) { + DRM_WARN("More than one eDP panel found, PSR support should be extended\n"); + return; + } + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, sizeof(intel_dp->psr_dpcd)); @@ -305,7 +310,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) dev_priv->psr.sink_sync_latency = intel_dp_get_sink_sync_latency(intel_dp); - WARN_ON(dev_priv->psr.dp); dev_priv->psr.dp = intel_dp; if (INTEL_GEN(dev_priv) >= 9 && From 0f81e645eb1ed224af43bb7b347806cc74d189f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Sat, 17 Aug 2019 02:38:33 -0700 Subject: [PATCH 038/331] drm/i915: Do not read PSR2 register in transcoders without PSR2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fix unclaimed access warnings: [ 245.525788] ------------[ cut here ]------------ [ 245.525884] Unclaimed read from register 0x62900 [ 245.526154] WARNING: CPU: 0 PID: 1234 at drivers/gpu/drm/i915/intel_uncore.c:1100 __unclaimed_reg_debug+0x40/0x50 [i915] [ 245.526160] Modules linked in: i915 x86_pkg_temp_thermal ax88179_178a coretemp usbnet crct10dif_pclmul mii crc32_pclmul ghash_clmulni_intel e1000e [last unloaded: i915] [ 245.526191] CPU: 0 PID: 1234 Comm: kms_fullmodeset Not tainted 5.1.0-rc6+ #915 [ 245.526197] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWR1.D00.2081.A10.1904182155 04/18/2019 [ 245.526273] RIP: 0010:__unclaimed_reg_debug+0x40/0x50 [i915] [ 245.526281] Code: 74 05 5b 5d 41 5c c3 45 84 e4 48 c7 c0 76 97 21 a0 48 c7 c6 6c 97 21 a0 89 ea 48 0f 44 f0 48 c7 c7 7f 97 21 a0 e8 4f 1e fe e0 <0f> 0b 83 2d 6f d9 1c 00 01 5b 5d 41 5c c3 66 90 41 57 41 56 41 55 [ 245.526288] RSP: 0018:ffffc900006bf7d8 EFLAGS: 00010086 [ 245.526297] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 245.526304] RDX: 0000000000000007 RSI: 0000000000000000 RDI: 00000000ffffffff [ 245.526310] RBP: 0000000000061900 R08: 0000000000000000 R09: 0000000000000001 [ 245.526317] R10: 0000000000000006 R11: 0000000000000000 R12: 0000000000000001 [ 245.526324] R13: 0000000000000000 R14: ffff8882914f0d58 R15: 0000000000000206 [ 245.526332] FS: 00007fed2a3c39c0(0000) GS:ffff8882a8600000(0000) knlGS:0000000000000000 [ 245.526340] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 245.526347] CR2: 00007fed28dff000 CR3: 00000002a086c006 CR4: 0000000000760ef0 [ 245.526354] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 245.526361] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 245.526367] PKRU: 55555554 [ 245.526373] Call Trace: [ 245.526454] gen11_fwtable_read32+0x219/0x250 [i915] [ 245.526576] intel_psr_activate+0x57/0x400 [i915] [ 245.526697] intel_psr_enable_locked+0x367/0x4b0 [i915] [ 245.526828] intel_psr_enable+0xa4/0xd0 [i915] [ 245.526946] intel_enable_ddi+0x127/0x2f0 [i915] [ 245.527075] intel_encoders_enable.isra.79+0x62/0x90 [i915] [ 245.527202] haswell_crtc_enable+0x2a2/0x850 [i915] [ 245.527337] intel_update_crtc+0x51/0x360 [i915] [ 245.527466] skl_update_crtcs+0x26c/0x300 [i915] [ 245.527603] intel_atomic_commit_tail+0x3e5/0x13c0 [i915] [ 245.527757] intel_atomic_commit+0x24d/0x2d0 [i915] [ 245.527782] drm_atomic_helper_set_config+0x7b/0x90 [ 245.527799] drm_mode_setcrtc+0x1b4/0x6f0 [ 245.527856] ? drm_mode_getcrtc+0x180/0x180 [ 245.527867] drm_ioctl_kernel+0xad/0xf0 [ 245.527886] drm_ioctl+0x2f4/0x3b0 [ 245.527902] ? drm_mode_getcrtc+0x180/0x180 [ 245.527935] ? rcu_read_lock_sched_held+0x6f/0x80 [ 245.527956] do_vfs_ioctl+0xa0/0x6d0 [ 245.527970] ? __task_pid_nr_ns+0xb6/0x200 [ 245.527991] ksys_ioctl+0x35/0x70 [ 245.528009] __x64_sys_ioctl+0x11/0x20 [ 245.528020] do_syscall_64+0x55/0x180 [ 245.528034] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 245.528042] RIP: 0033:0x7fed2cc7c3c7 [ 245.528050] Code: 00 00 90 48 8b 05 c9 3a 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 99 3a 0d 00 f7 d8 64 89 01 48 [ 245.528057] RSP: 002b:00007ffe36944378 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 245.528067] RAX: ffffffffffffffda RBX: 00007ffe369443b0 RCX: 00007fed2cc7c3c7 [ 245.528074] RDX: 00007ffe369443b0 RSI: 00000000c06864a2 RDI: 0000000000000003 [ 245.528081] RBP: 00007ffe369443b0 R08: 0000000000000000 R09: 0000564c0173ae98 [ 245.528088] R10: 0000564c0173aeb8 R11: 0000000000000246 R12: 00000000c06864a2 [ 245.528095] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 [ 245.528128] irq event stamp: 140866 [ 245.528138] hardirqs last enabled at (140865): [] _raw_spin_unlock_irqrestore+0x4c/0x60 [ 245.528148] hardirqs last disabled at (140866): [] _raw_spin_lock_irqsave+0xd/0x50 [ 245.528158] softirqs last enabled at (140860): [] __do_softirq+0x38c/0x499 [ 245.528170] softirqs last disabled at (140853): [] irq_exit+0xa9/0xc0 [ 245.528247] WARNING: CPU: 0 PID: 1234 at drivers/gpu/drm/i915/intel_uncore.c:1100 __unclaimed_reg_debug+0x40/0x50 [i915] [ 245.528254] ---[ end trace 366069676e98a410 ]--- Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Imre Deak Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-7-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 78e920015a000..dafd3737cc5aa 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -541,7 +541,9 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) static bool transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans) { - if (INTEL_GEN(dev_priv) >= 12) + if (INTEL_GEN(dev_priv) < 9) + return false; + else if (INTEL_GEN(dev_priv) >= 12) return trans == TRANSCODER_A; else return trans == TRANSCODER_EDP; @@ -667,8 +669,9 @@ static void intel_psr_activate(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (INTEL_GEN(dev_priv) >= 9) + if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) WARN_ON(I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)) & EDP_PSR2_ENABLE); + WARN_ON(I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & EDP_PSR_ENABLE); WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); @@ -821,7 +824,7 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) u32 val; if (!dev_priv->psr.active) { - if (INTEL_GEN(dev_priv) >= 9) { + if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) { val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); WARN_ON(val & EDP_PSR2_ENABLE); } From f7b3c22619a44ce7599fc9fa1df5df6281a26217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 23 Aug 2019 01:20:41 -0700 Subject: [PATCH 039/331] drm/i915/tgl: Add maximum resolution supported by PSR2 HW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TGL PSR2 HW supports a bigger resolution, so lets add it BSpec: 50422, 49199 Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-10-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index dafd3737cc5aa..2af3826121fa9 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -576,7 +576,10 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 12) { + psr_max_h = 5120; + psr_max_v = 3200; + } else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { psr_max_h = 4096; psr_max_v = 2304; } else if (IS_GEN(dev_priv, 9)) { From 0456417ef680aab9a2aa93b950443e25104101b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 23 Aug 2019 01:20:43 -0700 Subject: [PATCH 040/331] drm: Add for_each_oldnew_intel_crtc_in_state_reverse() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as for_each_oldnew_intel_crtc_in_state() but iterates in reverse order. v2: Fix additional blank line v3: Rebase Cc: Rodrigo Vivi Cc: Ville Syrjälä Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-12-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index e57e6969051d0..03321fb4a7038 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -411,6 +411,15 @@ enum phy_fia { (__i)++) \ for_each_if(crtc) +#define for_each_oldnew_intel_crtc_in_state_reverse(__state, crtc, old_crtc_state, new_crtc_state, __i) \ + for ((__i) = (__state)->base.dev->mode_config.num_crtc - 1; \ + (__i) >= 0 && \ + ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ + (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \ + (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ + (__i)--) \ + for_each_if(crtc) + void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, From 9c722e17c1b91d7dc5ec77cb4fcaaa6b30653fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 23 Aug 2019 01:20:44 -0700 Subject: [PATCH 041/331] drm/i915: Disable pipes in reverse order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable CRTC/pipes in reverse order because some features (MST in TGL+) requires master and slave relationship between pipes, so it should always pick the lowest pipe as master as it will be enabled first and disable in the reverse order so the master will be the last one to be disabled. Cc: Rodrigo Vivi Cc: Ville Syrjälä Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-13-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ea2915dde6abb..a9f8be2cd3647 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13908,7 +13908,15 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) if (state->modeset) wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + /* + * Disable CRTC/pipes in reverse order because some features(MST in + * TGL+) requires master and slave relationship between pipes, so it + * should always pick the lowest pipe as master as it will be enabled + * first and disable in the reverse order so the master will be the + * last one to be disabled. + */ + for_each_oldnew_intel_crtc_in_state_reverse(state, crtc, old_crtc_state, + new_crtc_state, i) { if (needs_modeset(new_crtc_state) || new_crtc_state->update_pipe) { From 99389390fef50c880d2ca0b6667c12423b3d5260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 23 Aug 2019 01:20:47 -0700 Subject: [PATCH 042/331] drm/i915/tgl: Implement TGL DisplayPort training sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On TGL some registers moved from DDI to transcoder and the DisplayPort training sequence has a separate BSpec page. I started adding 'ifs' to the original intel_ddi_pre_enable_dp() but it was becoming really hard to follow, so a new and cleaner function for TGL was added with comments of all steps. It's similar to ICL, but different enough to deserve a new function. The rest of DisplayPort enable and the whole disable sequences remained the same. v2: FEC and DSC should be enabled on sink side before start link training(Maarten reported and Manasi confirmed the DSC part) v3: Add call to enable FEC on step 7.l(Manasi) BSpec: 49190 Cc: Maarten Lankhorst Cc: Manasi Navare Cc: Ville Syrjälä Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-16-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 140 ++++++++++++++++++++++- drivers/gpu/drm/i915/display/intel_dp.c | 8 +- 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 8eb2b3ec01edd..3180dacb5be49 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1761,7 +1761,14 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +/* + * Returns the TRANS_DDI_FUNC_CTL value based on CRTC state. + * + * Only intended to be used by intel_ddi_enable_transcoder_func() and + * intel_ddi_config_transcoder_func(). + */ +static u32 +intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); @@ -1845,6 +1852,34 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } + return temp; +} + +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 temp; + + temp = intel_ddi_transcoder_func_reg_val_get(crtc_state); + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); +} + +/* + * Same as intel_ddi_enable_transcoder_func(), but it does not set the enable + * bit. + */ +static void +intel_ddi_config_transcoder_func(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 temp; + + temp = intel_ddi_transcoder_func_reg_val_get(crtc_state); + temp &= ~TRANS_DDI_FUNC_ENABLE; I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } @@ -3160,9 +3195,94 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, POSTING_READ(DP_TP_CTL(port)); } -static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); + + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count, is_mst); + + /* 1.a got on intel_atomic_commit_tail() */ + + /* 2. */ + intel_edp_panel_on(intel_dp); + + /* + * 1.b, 3. and 4. is done before tgl_ddi_pre_enable_dp() by: + * haswell_crtc_enable()->intel_encoders_pre_pll_enable() and + * haswell_crtc_enable()->intel_enable_shared_dpll() + */ + + /* 5. */ + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); + + /* 6. */ + icl_program_mg_dp_mode(dig_port); + + /* + * 7.a - Steps in this function should only be executed over MST + * master, what will be taken in care by MST hook + * intel_mst_pre_enable_dp() + */ + intel_ddi_enable_pipe_clock(crtc_state); + + /* 7.b */ + intel_ddi_config_transcoder_func(crtc_state); + + /* 7.d */ + icl_disable_phy_clock_gating(dig_port); + + /* 7.e */ + icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, + encoder->type); + + /* 7.f */ + if (intel_phy_is_combo(dev_priv, phy)) { + bool lane_reversal = + dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + + intel_combo_phy_power_up_lanes(dev_priv, phy, false, + crtc_state->lane_count, + lane_reversal); + } + + /* 7.g */ + intel_ddi_init_dp_buf_reg(encoder); + + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + + intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + /* + * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit + * in the FEC_CONFIGURATION register to 1 before initiating link + * training + */ + intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + /* 7.c, 7.h, 7.i, 7.j */ + intel_dp_start_link_train(intel_dp); + + /* 7.k */ + intel_dp_stop_link_train(intel_dp); + + /* 7.l */ + intel_ddi_enable_fec(encoder, crtc_state); + intel_dsc_enable(encoder, crtc_state); +} + +static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -3228,6 +3348,18 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_dsc_enable(encoder, crtc_state); } +static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state); + else + hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); +} + static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 23908da1cd5da..f74594d8d9df7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3954,13 +3954,13 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) I915_WRITE(DP_TP_CTL(port), val); /* - * On PORT_A we can have only eDP in SST mode. There the only reason - * we need to set idle transmission mode is to work around a HW issue - * where we enable the pipe while not in idle link-training mode. + * Until TGL on PORT_A we can have only eDP in SST mode. There the only + * reason we need to set idle transmission mode is to work around a HW + * issue where we enable the pipe while not in idle link-training mode. * In this case there is requirement to wait for a minimum number of * idle patterns to be sent. */ - if (port == PORT_A) + if (port == PORT_A && INTEL_GEN(dev_priv) < 12) return; if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), From a8ff5d405e90e7044ab45f315f2dbcbe6aebf5f8 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 23 Aug 2019 01:20:48 -0700 Subject: [PATCH 043/331] drm/i915/tgl: Do not apply WaIncreaseDefaultTLBEntries from GEN12 onwards Workaround no longer needed (plus L3_LRA_1_GPGPU doesn't exist). Cc: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Signed-off-by: Michel Thierry Signed-off-by: Lucas De Marchi Reviewed-by: Stuart Summers Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-17-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0db82921fb38b..c94dfa5622474 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2029,7 +2029,7 @@ static void gtt_write_workarounds(struct intel_gt *gt) intel_uncore_write(uncore, GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); - else if (INTEL_GEN(i915) >= 9) + else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) intel_uncore_write(uncore, GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); From 45e9c829ebeaa13b3b999f76963b9920d2147128 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 23 Aug 2019 01:20:50 -0700 Subject: [PATCH 044/331] drm/i915/tgl/perf: use the same oa ctx_id format as icl Compared to Icelake, Tigerlake's MAX_CONTEXT_HW_ID is smaller by one, but since we just use the upper 32 bits of the lrc_desc, it's guaranteed OA will use the correct one. Cc: Lionel Landwerlin Signed-off-by: Michel Thierry Signed-off-by: Lucas De Marchi Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-19-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e42b86827d6b6..2c9f46e126223 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1299,7 +1299,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } break; - case 11: { + case 11: + case 12: { stream->specific_ctx_id_mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | From 8a9a982767b7c89b2f23290e4f0f21f194b79dfe Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 27 Aug 2019 11:58:05 -0700 Subject: [PATCH 045/331] drm/i915: use a separate context for gpu relocs The CS pre-parser can pre-fetch commands across memory sync points and starting from gen12 it is able to pre-fetch across BB_START and BB_END boundaries as well, so when we emit gpu relocs the pre-parser might fetch the target location of the reloc before the memory write lands. The parser can't pre-fetch across the ctx switch, so we use a separate context to guarantee that the memory is synchronized before the parser can get to it. Note that there is no risk of the CS doing a lite restore from the reloc context to the user context, even if the two have the same hw_id, because since gen11 the CS also checks the LRCA when deciding if it can lite-restore. v2: limit new context to gen12+, release in eb_destroy, add a comment in emit_fini_breadcrumb (Chris). Suggested-by: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190827185805.21799-1-daniele.ceraolospurio@intel.com --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 30 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_lrc.c | 18 +++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5a2238d444237..8fbb454cfd6bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -252,6 +252,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; + struct intel_context *ce; struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; @@ -880,6 +881,9 @@ static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); + if (eb->reloc_cache.ce) + intel_context_put(eb->reloc_cache.ce); + if (eb->lut_size > 0) kfree(eb->buckets); } @@ -903,6 +907,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.allocated = false; + cache->ce = NULL; cache->rq = NULL; cache->rq_size = 0; } @@ -1168,7 +1173,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_create(eb->context); + rq = intel_context_create_request(cache->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1239,6 +1244,29 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); + if (!cache->ce) { + struct intel_context *ce; + + /* + * The CS pre-parser can pre-fetch commands across + * memory sync points and starting gen12 it is able to + * pre-fetch across BB_START and BB_END boundaries + * (within the same context). We therefore use a + * separate context gen12+ to guarantee that the reloc + * writes land before the parser gets to the target + * memory location. + */ + if (cache->gen >= 12) + ce = intel_context_create(eb->context->gem_context, + eb->engine); + else + ce = intel_context_get(eb->context); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + cache->ce = ce; + } + err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index cfbdcca76ff06..a141e9e37bf7b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2931,6 +2931,24 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) return gen8_emit_fini_breadcrumb_footer(request, cs); } +/* + * Note that the CS instruction pre-parser will not stall on the breadcrumb + * flush and will continue pre-fetching the instructions after it before the + * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at + * BB_START/END instructions, so, even though we might pre-fetch the pre-amble + * of the next request before the memory has been flushed, we're guaranteed that + * we won't access the batch itself too early. + * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, + * so, if the current request is modifying an instruction in the next request on + * the same intel_context, we might pre-fetch and then execute the pre-update + * instruction. To avoid this, the users of self-modifying code should either + * disable the parser around the code emitting the memory writes, via a new flag + * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For + * the in-kernel use-cases we've opted to use a separate context, see + * reloc_gpu() as an example. + * All the above applies only to the instructions themselves. Non-inline data + * used by the instructions is not pre-fetched. + */ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { From f2085c8e950d536c7982182b0c9c015804011dd2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Aug 2019 17:17:25 +0100 Subject: [PATCH 046/331] drm/i915/selftests: Remove accidental serialization between gpu_fill Upon object creation for live_gem_contexts, we fill the object with known scratch and flush it out of the CPU cache. Before performing the GPU fill, we don't need to flush it again and so avoid serialising with previous fills. However, we do need some throttling on the internal interfaces if we do not want to run out of memory! Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190827161726.3640-1-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_context.c | 83 ++++++++++++++++--- 1 file changed, 72 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 37a177e376659..63116c4fa8ba9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -180,12 +180,6 @@ static int gpu_fill(struct intel_context *ce, if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; @@ -343,6 +337,45 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) return npages / DW_PER_PAGE; } +static void throttle_release(struct i915_request **q, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (IS_ERR_OR_NULL(q[i])) + continue; + + i915_request_put(fetch_and_zero(&q[i])); + } +} + +static int throttle(struct intel_context *ce, + struct i915_request **q, int count) +{ + int i; + + if (!IS_ERR_OR_NULL(q[0])) { + if (i915_request_wait(q[0], + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + return -EINTR; + + i915_request_put(q[0]); + } + + for (i = 0; i < count - 1; i++) + q[i] = q[i + 1]; + + q[i] = intel_context_create_request(ce); + if (IS_ERR(q[i])) + return PTR_ERR(q[i]); + + i915_request_get(q[i]); + i915_request_add(q[i]); + + return 0; +} + static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; @@ -362,6 +395,7 @@ static int igt_ctx_exec(void *arg) for_each_engine(engine, i915, id) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct i915_request *tq[5] = {}; struct igt_live_test t; struct drm_file *file; IGT_TIMEOUT(end_time); @@ -409,13 +443,18 @@ static int igt_ctx_exec(void *arg) } err = gpu_fill(ce, obj, dw); - intel_context_put(ce); - if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, ctx->hw_id, yesno(!!ctx->vm), err); + intel_context_put(ce); + goto out_unlock; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); goto out_unlock; } @@ -426,6 +465,8 @@ static int igt_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -444,6 +485,7 @@ static int igt_ctx_exec(void *arg) } out_unlock: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); @@ -461,6 +503,7 @@ static int igt_ctx_exec(void *arg) static int igt_shared_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -535,14 +578,20 @@ static int igt_shared_ctx_exec(void *arg) } err = gpu_fill(ce, obj, dw); - intel_context_put(ce); - kernel_context_close(ctx); - if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, ctx->hw_id, yesno(!!ctx->vm), err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_test; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); + kernel_context_close(ctx); goto out_test; } @@ -553,6 +602,9 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", ncontexts, engine->name, ndwords); @@ -574,6 +626,7 @@ static int igt_shared_ctx_exec(void *arg) mutex_lock(&i915->drm.struct_mutex); } out_test: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; out_unlock: @@ -1050,6 +1103,7 @@ static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj = NULL; + struct i915_request *tq[5] = {}; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx, ndwords, dw; @@ -1121,6 +1175,12 @@ static int igt_ctx_readonly(void *arg) goto out_unlock; } + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_unlock; + } + if (++dw == max_dwords(obj)) { obj = NULL; dw = 0; @@ -1151,6 +1211,7 @@ static int igt_ctx_readonly(void *arg) } out_unlock: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; mutex_unlock(&i915->drm.struct_mutex); From c4e6488120e9ef1ceced8fb0caf134c5242ddaf4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Aug 2019 17:17:26 +0100 Subject: [PATCH 047/331] drm/i915/selftests: Try to recycle context allocations igt_ctx_exec allocates a new context for each iteration, keeping them all allocated until the end. Instead, release the local ctx reference at the end of each iteration, allowing ourselves to reap those if under mempressure. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190827161726.3640-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 63116c4fa8ba9..da54a718c7129 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -424,7 +424,7 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = live_context(i915, file); + ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_unlock; @@ -438,6 +438,7 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(obj)) { err = PTR_ERR(obj); intel_context_put(ce); + kernel_context_close(ctx); goto out_unlock; } } @@ -449,12 +450,14 @@ static int igt_ctx_exec(void *arg) engine->name, ctx->hw_id, yesno(!!ctx->vm), err); intel_context_put(ce); + kernel_context_close(ctx); goto out_unlock; } err = throttle(ce, tq, ARRAY_SIZE(tq)); if (err) { intel_context_put(ce); + kernel_context_close(ctx); goto out_unlock; } @@ -467,6 +470,7 @@ static int igt_ctx_exec(void *arg) ncontexts++; intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", From e8f6b4952ec54a9d7e43f908d39dc168b0310599 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Aug 2019 13:06:15 +0100 Subject: [PATCH 048/331] drm/i915/execlists: Flush the post-sync breadcrumb write harder Quite rarely we see that the CS completion event fires before the breadcrumb is coherent, which presumably is a result of the CS_STALL not waiting for the post-sync operation. Try throwing in a DC_FLUSH into the following pipecontrol to see if that makes any difference. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190827120615.31390-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a141e9e37bf7b..171d5205962c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2923,8 +2923,10 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE); /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ + /* XXX DC_FLUSH for post-sync write? (cf early context-switch bug) */ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL, 0); From 592a7c5e082e237c7508f37b2c3494a731acad88 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 9 Aug 2019 17:56:53 +0300 Subject: [PATCH 049/331] drm/i915: Extend non readable mcr range Our current avoidance of non readable mcr range was not inclusive enough. Extend the start and end. References: HSDES#1405586840 Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Acked-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190809145653.2279-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ad2261e0cba80..8639fcccdb420 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1453,7 +1453,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff)) + if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) return true; return false; From 325b916a9e1b11d28c40b02e24734750946a1785 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2019 14:38:37 +0100 Subject: [PATCH 050/331] drm/i915/selftests: Ignore coherency failures on Broadwater We've been ignoring similar coherency issues in IGT for Broadwater, and specifically Broadwater (original gen4) and not, for example, Crestline (same generation as Broadwater, but the mobile variant). Without any means to reproduce locally (I have a 965GM but alas no 965G), fixing will be slow, so tell CI to ignore any failure until we are ready with a fix. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190826133837.6784-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 17006d50b63f0..a8014c59b3889 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1119,6 +1119,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) case 3: /* maybe only uses physical not virtual addresses */ return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); + case 4: + return !IS_I965G(engine->i915); /* who knows! */ case 6: return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ default: From 8a84bacba19c154bdcb278657d0b7594ce2d939c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2019 13:07:11 +0300 Subject: [PATCH 051/331] drm/i915: Align power domain names with port names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a difference in BSpec's and the driver's designation of DDI ports. BSpec uses the following names: - before GEN11: BSpec/driver: port A/B/C/D etc - GEN11: BSpec/driver: port A-F - GEN12: BSpec: port A/B/C for combo PHY ports port TC1-6 for Type C PHY ports driver: port A-I. The driver's port D name matches BSpec's TC1 port name. So far power domains were named according to the BSpec designation, to make it easier to match the code against the specification. That however can be confusing when a power domain needs to be matched to a port on GEN12+. To resolve that use the driver's port A-I designation for power domain names too and rename the corresponding power wells so that they reflect the mapping from the driver's to BSpec's port name. Cc: Lucas De Marchi Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Stanislav Lisovskiy Link: https://patchwork.freedesktop.org/patch/msgid/20190823100711.27833-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 10 +- .../drm/i915/display/intel_display_power.c | 361 +++++++++--------- .../drm/i915/display/intel_display_power.h | 40 +- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- 4 files changed, 198 insertions(+), 216 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a9f8be2cd3647..4773832b0f49b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6737,16 +6737,16 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) dig_port->tc_mode == TC_PORT_TBT_ALT) { switch (dig_port->aux_ch) { case AUX_CH_C: - return POWER_DOMAIN_AUX_TBT1; + return POWER_DOMAIN_AUX_C_TBT; case AUX_CH_D: - return POWER_DOMAIN_AUX_TBT2; + return POWER_DOMAIN_AUX_D_TBT; case AUX_CH_E: - return POWER_DOMAIN_AUX_TBT3; + return POWER_DOMAIN_AUX_E_TBT; case AUX_CH_F: - return POWER_DOMAIN_AUX_TBT4; + return POWER_DOMAIN_AUX_F_TBT; default: MISSING_CASE(dig_port->aux_ch); - return POWER_DOMAIN_AUX_TBT1; + return POWER_DOMAIN_AUX_C_TBT; } } diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 12099760d99e9..ce88a27229ef5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -24,11 +24,8 @@ bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, enum i915_power_well_id power_well_id); const char * -intel_display_power_domain_str(struct drm_i915_private *i915, - enum intel_display_power_domain domain) +intel_display_power_domain_str(enum intel_display_power_domain domain) { - bool ddi_tc_ports = IS_GEN(i915, 12); - switch (domain) { case POWER_DOMAIN_DISPLAY_CORE: return "DISPLAY_CORE"; @@ -71,23 +68,17 @@ intel_display_power_domain_str(struct drm_i915_private *i915, case POWER_DOMAIN_PORT_DDI_C_LANES: return "PORT_DDI_C_LANES"; case POWER_DOMAIN_PORT_DDI_D_LANES: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_D_LANES != - POWER_DOMAIN_PORT_DDI_TC1_LANES); - return ddi_tc_ports ? "PORT_DDI_TC1_LANES" : "PORT_DDI_D_LANES"; + return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_E_LANES != - POWER_DOMAIN_PORT_DDI_TC2_LANES); - return ddi_tc_ports ? "PORT_DDI_TC2_LANES" : "PORT_DDI_E_LANES"; + return "PORT_DDI_E_LANES"; case POWER_DOMAIN_PORT_DDI_F_LANES: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_F_LANES != - POWER_DOMAIN_PORT_DDI_TC3_LANES); - return ddi_tc_ports ? "PORT_DDI_TC3_LANES" : "PORT_DDI_F_LANES"; - case POWER_DOMAIN_PORT_DDI_TC4_LANES: - return "PORT_DDI_TC4_LANES"; - case POWER_DOMAIN_PORT_DDI_TC5_LANES: - return "PORT_DDI_TC5_LANES"; - case POWER_DOMAIN_PORT_DDI_TC6_LANES: - return "PORT_DDI_TC6_LANES"; + return "PORT_DDI_F_LANES"; + case POWER_DOMAIN_PORT_DDI_G_LANES: + return "PORT_DDI_G_LANES"; + case POWER_DOMAIN_PORT_DDI_H_LANES: + return "PORT_DDI_H_LANES"; + case POWER_DOMAIN_PORT_DDI_I_LANES: + return "PORT_DDI_I_LANES"; case POWER_DOMAIN_PORT_DDI_A_IO: return "PORT_DDI_A_IO"; case POWER_DOMAIN_PORT_DDI_B_IO: @@ -95,23 +86,17 @@ intel_display_power_domain_str(struct drm_i915_private *i915, case POWER_DOMAIN_PORT_DDI_C_IO: return "PORT_DDI_C_IO"; case POWER_DOMAIN_PORT_DDI_D_IO: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_D_IO != - POWER_DOMAIN_PORT_DDI_TC1_IO); - return ddi_tc_ports ? "PORT_DDI_TC1_IO" : "PORT_DDI_D_IO"; + return "PORT_DDI_D_IO"; case POWER_DOMAIN_PORT_DDI_E_IO: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_E_IO != - POWER_DOMAIN_PORT_DDI_TC2_IO); - return ddi_tc_ports ? "PORT_DDI_TC2_IO" : "PORT_DDI_E_IO"; + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DDI_F_IO: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_F_IO != - POWER_DOMAIN_PORT_DDI_TC3_IO); - return ddi_tc_ports ? "PORT_DDI_TC3_IO" : "PORT_DDI_F_IO"; - case POWER_DOMAIN_PORT_DDI_TC4_IO: - return "PORT_DDI_TC4_IO"; - case POWER_DOMAIN_PORT_DDI_TC5_IO: - return "PORT_DDI_TC5_IO"; - case POWER_DOMAIN_PORT_DDI_TC6_IO: - return "PORT_DDI_TC6_IO"; + return "PORT_DDI_F_IO"; + case POWER_DOMAIN_PORT_DDI_G_IO: + return "PORT_DDI_G_IO"; + case POWER_DOMAIN_PORT_DDI_H_IO: + return "PORT_DDI_H_IO"; + case POWER_DOMAIN_PORT_DDI_I_IO: + return "PORT_DDI_I_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -129,34 +114,33 @@ intel_display_power_domain_str(struct drm_i915_private *i915, case POWER_DOMAIN_AUX_C: return "AUX_C"; case POWER_DOMAIN_AUX_D: - BUILD_BUG_ON(POWER_DOMAIN_AUX_D != POWER_DOMAIN_AUX_TC1); - return ddi_tc_ports ? "AUX_TC1" : "AUX_D"; + return "AUX_D"; case POWER_DOMAIN_AUX_E: - BUILD_BUG_ON(POWER_DOMAIN_AUX_E != POWER_DOMAIN_AUX_TC2); - return ddi_tc_ports ? "AUX_TC2" : "AUX_E"; + return "AUX_E"; case POWER_DOMAIN_AUX_F: - BUILD_BUG_ON(POWER_DOMAIN_AUX_F != POWER_DOMAIN_AUX_TC3); - return ddi_tc_ports ? "AUX_TC3" : "AUX_F"; - case POWER_DOMAIN_AUX_TC4: - return "AUX_TC4"; - case POWER_DOMAIN_AUX_TC5: - return "AUX_TC5"; - case POWER_DOMAIN_AUX_TC6: - return "AUX_TC6"; + return "AUX_F"; + case POWER_DOMAIN_AUX_G: + return "AUX_G"; + case POWER_DOMAIN_AUX_H: + return "AUX_H"; + case POWER_DOMAIN_AUX_I: + return "AUX_I"; case POWER_DOMAIN_AUX_IO_A: return "AUX_IO_A"; - case POWER_DOMAIN_AUX_TBT1: - return "AUX_TBT1"; - case POWER_DOMAIN_AUX_TBT2: - return "AUX_TBT2"; - case POWER_DOMAIN_AUX_TBT3: - return "AUX_TBT3"; - case POWER_DOMAIN_AUX_TBT4: - return "AUX_TBT4"; - case POWER_DOMAIN_AUX_TBT5: - return "AUX_TBT5"; - case POWER_DOMAIN_AUX_TBT6: - return "AUX_TBT6"; + case POWER_DOMAIN_AUX_C_TBT: + return "AUX_C_TBT"; + case POWER_DOMAIN_AUX_D_TBT: + return "AUX_D_TBT"; + case POWER_DOMAIN_AUX_E_TBT: + return "AUX_E_TBT"; + case POWER_DOMAIN_AUX_F_TBT: + return "AUX_F_TBT"; + case POWER_DOMAIN_AUX_G_TBT: + return "AUX_G_TBT"; + case POWER_DOMAIN_AUX_H_TBT: + return "AUX_H_TBT"; + case POWER_DOMAIN_AUX_I_TBT: + return "AUX_I_TBT"; case POWER_DOMAIN_GMBUS: return "GMBUS"; case POWER_DOMAIN_INIT: @@ -1718,15 +1702,12 @@ __async_put_domains_state_ok(struct i915_power_domains *power_domains) static void print_power_domains(struct i915_power_domains *power_domains, const char *prefix, u64 mask) { - struct drm_i915_private *i915 = - container_of(power_domains, struct drm_i915_private, - power_domains); enum intel_display_power_domain domain; DRM_DEBUG_DRIVER("%s (%lu):\n", prefix, hweight64(mask)); for_each_power_domain(domain, mask) DRM_DEBUG_DRIVER("%s use_count %d\n", - intel_display_power_domain_str(i915, domain), + intel_display_power_domain_str(domain), power_domains->domain_use_count[domain]); } @@ -1896,7 +1877,7 @@ __intel_display_power_put_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - const char *name = intel_display_power_domain_str(dev_priv, domain); + const char *name = intel_display_power_domain_str(domain); power_domains = &dev_priv->power_domains; @@ -2487,10 +2468,10 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_D) | \ BIT_ULL(POWER_DOMAIN_AUX_E) | \ BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4) | \ + BIT_ULL(POWER_DOMAIN_AUX_C_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -2530,22 +2511,22 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A)) #define ICL_AUX_B_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_B)) -#define ICL_AUX_C_IO_POWER_DOMAINS ( \ +#define ICL_AUX_C_TC1_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_C)) -#define ICL_AUX_D_IO_POWER_DOMAINS ( \ +#define ICL_AUX_D_TC2_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_D)) -#define ICL_AUX_E_IO_POWER_DOMAINS ( \ +#define ICL_AUX_E_TC3_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_E)) -#define ICL_AUX_F_IO_POWER_DOMAINS ( \ +#define ICL_AUX_F_TC4_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_F)) -#define ICL_AUX_TBT1_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1)) -#define ICL_AUX_TBT2_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2)) -#define ICL_AUX_TBT3_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3)) -#define ICL_AUX_TBT4_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4)) +#define ICL_AUX_C_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C_TBT)) +#define ICL_AUX_D_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT)) +#define ICL_AUX_E_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT)) +#define ICL_AUX_F_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT)) #define TGL_PW_5_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PIPE_D) | \ @@ -2565,24 +2546,24 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_PIPE_B) | \ BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC1_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC2_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC3_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC4_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC5_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC6_LANES) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC4) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC5) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC6) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT5) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT6) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_G_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_H_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_I_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_E) | \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ + BIT_ULL(POWER_DOMAIN_AUX_G) | \ + BIT_ULL(POWER_DOMAIN_AUX_H) | \ + BIT_ULL(POWER_DOMAIN_AUX_I) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -2598,35 +2579,50 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define TGL_DDI_IO_TC1_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC1_IO)) -#define TGL_DDI_IO_TC2_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC2_IO)) -#define TGL_DDI_IO_TC3_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC3_IO)) -#define TGL_DDI_IO_TC4_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC4_IO)) -#define TGL_DDI_IO_TC5_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC5_IO)) -#define TGL_DDI_IO_TC6_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC6_IO)) - -#define TGL_AUX_TC1_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC1)) -#define TGL_AUX_TC2_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC2)) -#define TGL_AUX_TC3_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC3)) -#define TGL_AUX_TC4_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC4)) -#define TGL_AUX_TC5_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC5)) -#define TGL_AUX_TC6_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC6)) -#define TGL_AUX_TBT5_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT5)) -#define TGL_AUX_TBT6_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT6)) +#define TGL_DDI_IO_D_TC1_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO)) +#define TGL_DDI_IO_E_TC2_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO)) +#define TGL_DDI_IO_F_TC3_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO)) +#define TGL_DDI_IO_G_TC4_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_G_IO)) +#define TGL_DDI_IO_H_TC5_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_H_IO)) +#define TGL_DDI_IO_I_TC6_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_I_IO)) + +#define TGL_AUX_A_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_A)) +#define TGL_AUX_B_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_B)) +#define TGL_AUX_C_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C)) +#define TGL_AUX_D_TC1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D)) +#define TGL_AUX_E_TC2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E)) +#define TGL_AUX_F_TC3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F)) +#define TGL_AUX_G_TC4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_G)) +#define TGL_AUX_H_TC5_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_H)) +#define TGL_AUX_I_TC6_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_I)) +#define TGL_AUX_D_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT)) +#define TGL_AUX_E_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT)) +#define TGL_AUX_F_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT)) +#define TGL_AUX_G_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT)) +#define TGL_AUX_H_TBT5_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT)) +#define TGL_AUX_I_TBT6_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, @@ -3484,8 +3480,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX C", - .domains = ICL_AUX_C_IO_POWER_DOMAINS, + .name = "AUX C TC1", + .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3495,8 +3491,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX D", - .domains = ICL_AUX_D_IO_POWER_DOMAINS, + .name = "AUX D TC2", + .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3506,8 +3502,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX E", - .domains = ICL_AUX_E_IO_POWER_DOMAINS, + .name = "AUX E TC3", + .domains = ICL_AUX_E_TC3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3517,8 +3513,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX F", - .domains = ICL_AUX_F_IO_POWER_DOMAINS, + .name = "AUX F TC4", + .domains = ICL_AUX_F_TC4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3528,8 +3524,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT1", - .domains = ICL_AUX_TBT1_IO_POWER_DOMAINS, + .name = "AUX C TBT1", + .domains = ICL_AUX_C_TBT1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3539,8 +3535,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT2", - .domains = ICL_AUX_TBT2_IO_POWER_DOMAINS, + .name = "AUX D TBT2", + .domains = ICL_AUX_D_TBT2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3550,8 +3546,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT3", - .domains = ICL_AUX_TBT3_IO_POWER_DOMAINS, + .name = "AUX E TBT3", + .domains = ICL_AUX_E_TBT3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3561,8 +3557,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT4", - .domains = ICL_AUX_TBT4_IO_POWER_DOMAINS, + .name = "AUX F TBT4", + .domains = ICL_AUX_F_TBT4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3667,8 +3663,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { } }, { - .name = "DDI TC1 IO", - .domains = TGL_DDI_IO_TC1_POWER_DOMAINS, + .name = "DDI D TC1 IO", + .domains = TGL_DDI_IO_D_TC1_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3677,8 +3673,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC2 IO", - .domains = TGL_DDI_IO_TC2_POWER_DOMAINS, + .name = "DDI E TC2 IO", + .domains = TGL_DDI_IO_E_TC2_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3687,8 +3683,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC3 IO", - .domains = TGL_DDI_IO_TC3_POWER_DOMAINS, + .name = "DDI F TC3 IO", + .domains = TGL_DDI_IO_F_TC3_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3697,8 +3693,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC4 IO", - .domains = TGL_DDI_IO_TC4_POWER_DOMAINS, + .name = "DDI G TC4 IO", + .domains = TGL_DDI_IO_G_TC4_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3707,8 +3703,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC5 IO", - .domains = TGL_DDI_IO_TC5_POWER_DOMAINS, + .name = "DDI H TC5 IO", + .domains = TGL_DDI_IO_H_TC5_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3717,8 +3713,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC6 IO", - .domains = TGL_DDI_IO_TC6_POWER_DOMAINS, + .name = "DDI I TC6 IO", + .domains = TGL_DDI_IO_I_TC6_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3728,7 +3724,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, { .name = "AUX A", - .domains = ICL_AUX_A_IO_POWER_DOMAINS, + .domains = TGL_AUX_A_IO_POWER_DOMAINS, .ops = &icl_combo_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3738,7 +3734,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, { .name = "AUX B", - .domains = ICL_AUX_B_IO_POWER_DOMAINS, + .domains = TGL_AUX_B_IO_POWER_DOMAINS, .ops = &icl_combo_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3748,7 +3744,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, { .name = "AUX C", - .domains = ICL_AUX_C_IO_POWER_DOMAINS, + .domains = TGL_AUX_C_IO_POWER_DOMAINS, .ops = &icl_combo_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3757,8 +3753,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC1", - .domains = TGL_AUX_TC1_IO_POWER_DOMAINS, + .name = "AUX D TC1", + .domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3768,8 +3764,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC2", - .domains = TGL_AUX_TC2_IO_POWER_DOMAINS, + .name = "AUX E TC2", + .domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3779,8 +3775,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC3", - .domains = TGL_AUX_TC3_IO_POWER_DOMAINS, + .name = "AUX F TC3", + .domains = TGL_AUX_F_TC3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3790,8 +3786,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC4", - .domains = TGL_AUX_TC4_IO_POWER_DOMAINS, + .name = "AUX G TC4", + .domains = TGL_AUX_G_TC4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3801,8 +3797,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC5", - .domains = TGL_AUX_TC5_IO_POWER_DOMAINS, + .name = "AUX H TC5", + .domains = TGL_AUX_H_TC5_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3812,8 +3808,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC6", - .domains = TGL_AUX_TC6_IO_POWER_DOMAINS, + .name = "AUX I TC6", + .domains = TGL_AUX_I_TC6_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3823,8 +3819,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT1", - .domains = ICL_AUX_TBT1_IO_POWER_DOMAINS, + .name = "AUX D TBT1", + .domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3834,8 +3830,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT2", - .domains = ICL_AUX_TBT2_IO_POWER_DOMAINS, + .name = "AUX E TBT2", + .domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3845,8 +3841,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT3", - .domains = ICL_AUX_TBT3_IO_POWER_DOMAINS, + .name = "AUX F TBT3", + .domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3856,8 +3852,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT4", - .domains = ICL_AUX_TBT4_IO_POWER_DOMAINS, + .name = "AUX G TBT4", + .domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3867,8 +3863,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT5", - .domains = TGL_AUX_TBT5_IO_POWER_DOMAINS, + .name = "AUX H TBT5", + .domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3878,8 +3874,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT6", - .domains = TGL_AUX_TBT6_IO_POWER_DOMAINS, + .name = "AUX I TBT6", + .domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -5104,8 +5100,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *i915) for_each_power_domain(domain, power_well->desc->domains) DRM_DEBUG_DRIVER(" %-23s %d\n", - intel_display_power_domain_str(i915, - domain), + intel_display_power_domain_str(domain), power_domains->domain_use_count[domain]); } } diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index a50605b8b1ad6..737b5def7fc6e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -36,29 +36,20 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_B_LANES, POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, - POWER_DOMAIN_PORT_DDI_TC1_LANES = POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, - POWER_DOMAIN_PORT_DDI_TC2_LANES = POWER_DOMAIN_PORT_DDI_E_LANES, POWER_DOMAIN_PORT_DDI_F_LANES, - POWER_DOMAIN_PORT_DDI_TC3_LANES = POWER_DOMAIN_PORT_DDI_F_LANES, - POWER_DOMAIN_PORT_DDI_TC4_LANES, - POWER_DOMAIN_PORT_DDI_TC5_LANES, - POWER_DOMAIN_PORT_DDI_TC6_LANES, + POWER_DOMAIN_PORT_DDI_G_LANES, + POWER_DOMAIN_PORT_DDI_H_LANES, + POWER_DOMAIN_PORT_DDI_I_LANES, POWER_DOMAIN_PORT_DDI_A_IO, POWER_DOMAIN_PORT_DDI_B_IO, POWER_DOMAIN_PORT_DDI_C_IO, POWER_DOMAIN_PORT_DDI_D_IO, - POWER_DOMAIN_PORT_DDI_TC1_IO = POWER_DOMAIN_PORT_DDI_D_IO, POWER_DOMAIN_PORT_DDI_E_IO, - POWER_DOMAIN_PORT_DDI_TC2_IO = POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DDI_F_IO, - POWER_DOMAIN_PORT_DDI_TC3_IO = POWER_DOMAIN_PORT_DDI_F_IO, POWER_DOMAIN_PORT_DDI_G_IO, - POWER_DOMAIN_PORT_DDI_TC4_IO = POWER_DOMAIN_PORT_DDI_G_IO, POWER_DOMAIN_PORT_DDI_H_IO, - POWER_DOMAIN_PORT_DDI_TC5_IO = POWER_DOMAIN_PORT_DDI_H_IO, POWER_DOMAIN_PORT_DDI_I_IO, - POWER_DOMAIN_PORT_DDI_TC6_IO = POWER_DOMAIN_PORT_DDI_I_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -68,21 +59,19 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_B, POWER_DOMAIN_AUX_C, POWER_DOMAIN_AUX_D, - POWER_DOMAIN_AUX_TC1 = POWER_DOMAIN_AUX_D, POWER_DOMAIN_AUX_E, - POWER_DOMAIN_AUX_TC2 = POWER_DOMAIN_AUX_E, POWER_DOMAIN_AUX_F, - POWER_DOMAIN_AUX_TC3 = POWER_DOMAIN_AUX_F, - POWER_DOMAIN_AUX_TC4, - POWER_DOMAIN_AUX_TC5, - POWER_DOMAIN_AUX_TC6, + POWER_DOMAIN_AUX_G, + POWER_DOMAIN_AUX_H, + POWER_DOMAIN_AUX_I, POWER_DOMAIN_AUX_IO_A, - POWER_DOMAIN_AUX_TBT1, - POWER_DOMAIN_AUX_TBT2, - POWER_DOMAIN_AUX_TBT3, - POWER_DOMAIN_AUX_TBT4, - POWER_DOMAIN_AUX_TBT5, - POWER_DOMAIN_AUX_TBT6, + POWER_DOMAIN_AUX_C_TBT, + POWER_DOMAIN_AUX_D_TBT, + POWER_DOMAIN_AUX_E_TBT, + POWER_DOMAIN_AUX_F_TBT, + POWER_DOMAIN_AUX_G_TBT, + POWER_DOMAIN_AUX_H_TBT, + POWER_DOMAIN_AUX_I_TBT, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, @@ -269,8 +258,7 @@ void intel_display_power_suspend(struct drm_i915_private *i915); void intel_display_power_resume(struct drm_i915_private *i915); const char * -intel_display_power_domain_str(struct drm_i915_private *i915, - enum intel_display_power_domain domain); +intel_display_power_domain_str(enum intel_display_power_domain domain); bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5c1a2b1e7d34b..5e81c4fc13aed 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2365,8 +2365,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) for_each_power_domain(power_domain, power_well->desc->domains) seq_printf(m, " %-23s %d\n", - intel_display_power_domain_str(dev_priv, - power_domain), + intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); } From 0f7dc62068bb2b929e213e759dba1eb038448a5c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 26 Aug 2019 08:21:27 +0100 Subject: [PATCH 052/331] drm/i915: Protect our local workers against I915_FENCE_TIMEOUT Trust our own workers to not cause unnecessary delays and disable the automatic timeout on their asynchronous fence waits. (Along the same lines that we trust our own requests to complete eventually, if necessary by force.) Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190826072149.9447-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 18865ce04e130..754a78364a633 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -924,7 +924,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) ret = i915_request_await_request(rq, to_request(fence)); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - I915_FENCE_TIMEOUT, + fence->context ? I915_FENCE_TIMEOUT : 0, I915_FENCE_GFP); if (ret < 0) return ret; From 9770f22077981cac2bd2f4dd6caa6269892cd3f7 Mon Sep 17 00:00:00 2001 From: Madhumitha Tolakanahalli Pradeep Date: Thu, 22 Aug 2019 17:46:55 -0700 Subject: [PATCH 053/331] drm/i915/tgl: Enabling DSC on Pipe A for TGL DSC was not supported on Pipe A for previous platforms. Tigerlake onwards, all the pipes support DSC. Hence, the DSC and FEC restriction on Pipe A needs to be removed. v2: Changes in the logic around removing the restriction around Pipe A (Manasi, Lucas) Cc: Manasi Navare Signed-off-by: Madhumitha Tolakanahalli Pradeep Reviewed-by: Manasi Navare Reviewed-by: Lucas De Marchi Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190823004655.28905-1-madhumitha.tolakanahalli.pradeep@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f74594d8d9df7..c9986c7a3f5ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1737,8 +1737,14 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - return INTEL_GEN(dev_priv) >= 11 && - pipe_config->cpu_transcoder != TRANSCODER_A; + /* On TGL, FEC is supported on all Pipes */ + if (INTEL_GEN(dev_priv) >= 12) + return true; + + if (IS_GEN(dev_priv, 11) && pipe_config->cpu_transcoder != TRANSCODER_A) + return true; + + return false; } static bool intel_dp_supports_fec(struct intel_dp *intel_dp, @@ -1753,8 +1759,15 @@ static bool intel_dp_source_supports_dsc(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - return INTEL_GEN(dev_priv) >= 10 && - pipe_config->cpu_transcoder != TRANSCODER_A; + /* On TGL, DSC is supported on all Pipes */ + if (INTEL_GEN(dev_priv) >= 12) + return true; + + if (INTEL_GEN(dev_priv) >= 10 && + pipe_config->cpu_transcoder != TRANSCODER_A) + return true; + + return false; } static bool intel_dp_supports_dsc(struct intel_dp *intel_dp, From 074c77e3ec636fa5e2664e3c79aba397ec569761 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 27 Aug 2019 01:45:16 -0700 Subject: [PATCH 054/331] drm/i915/tgl: Gen-12 display loses Yf tiling and legacy CCS support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yf tiling was removed in gen-12, so do not expose Yf modifiers to user space. Gen-12 display also is incompatible with pre-gen12 Y-tiled CCS, so do not expose I915_FORMAT_MOD_Y_TILED_CCS. v2: Rebase to carry forward recently added gen11 formats. Cc: Ville Syrjälä Cc: Stanislav Lisovskiy Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Ville Syrjälä Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190827084516.6748-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/display/intel_sprite.c | 86 +++++++++++++++++++-- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index dea63be1964f2..8ca2fbc2af1df 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -2157,6 +2157,13 @@ static const u64 skl_plane_format_modifiers_ccs[] = { DRM_FORMAT_MOD_INVALID }; +static const u64 gen12_plane_format_modifiers_noccs[] = { + I915_FORMAT_MOD_Y_TILED, + I915_FORMAT_MOD_X_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + static bool g4x_sprite_format_mod_supported(struct drm_plane *_plane, u32 format, u64 modifier) { @@ -2305,6 +2312,55 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, } } +static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, + u32 format, u64 modifier) +{ + switch (modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + case I915_FORMAT_MOD_Y_TILED: + break; + default: + return false; + } + + switch (format) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: + case DRM_FORMAT_UYVY: + case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + case DRM_FORMAT_XVYU2101010: + case DRM_FORMAT_C8: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_Y210: + case DRM_FORMAT_Y212: + case DRM_FORMAT_Y216: + case DRM_FORMAT_XVYU12_16161616: + case DRM_FORMAT_XVYU16161616: + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == I915_FORMAT_MOD_X_TILED || + modifier == I915_FORMAT_MOD_Y_TILED) + return true; + /* fall through */ + default: + return false; + } +} + static const struct drm_plane_funcs g4x_sprite_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -2341,6 +2397,15 @@ static const struct drm_plane_funcs skl_plane_funcs = { .format_mod_supported = skl_plane_format_mod_supported, }; +static const struct drm_plane_funcs gen12_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = intel_plane_destroy, + .atomic_duplicate_state = intel_plane_duplicate_state, + .atomic_destroy_state = intel_plane_destroy_state, + .format_mod_supported = gen12_plane_format_mod_supported, +}; + static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { @@ -2429,6 +2494,7 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { + static const struct drm_plane_funcs *plane_funcs; struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; @@ -2471,11 +2537,19 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, formats = skl_get_plane_formats(dev_priv, pipe, plane_id, &num_formats); - plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id); - if (plane->has_ccs) - modifiers = skl_plane_format_modifiers_ccs; - else - modifiers = skl_plane_format_modifiers_noccs; + if (INTEL_GEN(dev_priv) >= 12) { + /* TODO: Implement support for gen-12 CCS modifiers */ + plane->has_ccs = false; + modifiers = gen12_plane_format_modifiers_noccs; + plane_funcs = &gen12_plane_funcs; + } else { + plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id); + if (plane->has_ccs) + modifiers = skl_plane_format_modifiers_ccs; + else + modifiers = skl_plane_format_modifiers_noccs; + plane_funcs = &skl_plane_funcs; + } if (plane_id == PLANE_PRIMARY) plane_type = DRM_PLANE_TYPE_PRIMARY; @@ -2485,7 +2559,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, possible_crtcs = BIT(pipe); ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, - possible_crtcs, &skl_plane_funcs, + possible_crtcs, plane_funcs, formats, num_formats, modifiers, plane_type, "plane %d%c", plane_id + 1, From 99d7a74110ef4c97f921b90fa8d897c6d71610a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 23 Aug 2019 01:20:39 -0700 Subject: [PATCH 055/331] drm/i915/tgl: PSR link standby is not supported anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to BSpc if link standby is set on TGL+, PSR will not be enabled. Vendors should not use panels that requires link standby and even if they do, panel should assert a PSR error that will cause PSR to be disabled. BSpec: 50434 Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20190823082055.5992-8-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 2af3826121fa9..629b8b98a97f1 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1283,8 +1283,8 @@ void intel_psr_init(struct drm_i915_private *dev_priv) if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) /* HSW and BDW require workarounds that we don't implement. */ dev_priv->psr.link_standby = false; - else - /* For new platforms let's respect VBT back again */ + else if (INTEL_GEN(dev_priv) < 12) + /* For new platforms up to TGL let's respect VBT back again */ dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link; INIT_WORK(&dev_priv->psr.work, intel_psr_work); From e7b6affd0baef7b3c5e2acd94236093dcf437105 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Aug 2019 18:08:48 +0100 Subject: [PATCH 056/331] drm/i915/selftests: cond_resched() within the longer buddy tests Let the scheduler have a breather in between passes of the longer buddy tests. Important if we are running under kasan etc and this takes far longer than usual! Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190829170848.969-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_buddy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c index 23f784eae1e7d..1b856bae67b53 100644 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -375,6 +375,8 @@ static int igt_buddy_alloc_smoke(void *arg) if (err) break; + + cond_resched(); } if (err == -ENOMEM) @@ -687,6 +689,8 @@ static int igt_buddy_alloc_range(void *arg) rem -= size; if (!rem) break; + + cond_resched(); } if (err == -ENOMEM) From 9be02fde93e5b564663f10a48f19159fba718e3d Mon Sep 17 00:00:00 2001 From: Fernando Pacheco Date: Thu, 29 Aug 2019 10:41:53 -0700 Subject: [PATCH 057/331] drm/i915/uc: Extract common code from GuC stop/disable comm During normal driver unload we attempt to disable GuC communication while it is currently stopped. This results in a nop'd call to intel_guc_ct_disable within guc_disable_communication because stop/disable rely on the same flag to prevent further comms with CT. We can avoid the call to disable and still leave communication in a satisfactory state by extracting a set of shared steps from stop/disable. This set can include guc_disable_interrupts as we do not require the single caller of guc_stop_communication to be atomic: "drm/i915/selftests: Fixup atomic reset checking". This situation (stop -> disable) only occurs during intel_uc_fini_hw, so during fini, call guc_disable_communication only if currently enabled. The symmetric calls to enable/disable remain unmodified for all other scenarios. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110943 Signed-off-by: Fernando Pacheco Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190829174154.14675-1-fernando.pacheco@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 30 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 71ee7ab035ccb..29a9eec60d2e0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -224,17 +224,7 @@ static int guc_enable_communication(struct intel_guc *guc) return 0; } -static void guc_stop_communication(struct intel_guc *guc) -{ - intel_guc_ct_stop(&guc->ct); - - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; - - guc_clear_mmio_msg(guc); -} - -static void guc_disable_communication(struct intel_guc *guc) +static void __guc_stop_communication(struct intel_guc *guc) { /* * Events generated during or after CT disable are logged by guc in @@ -247,6 +237,20 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; guc->handler = intel_guc_to_host_event_handler_nop; +} + +static void guc_stop_communication(struct intel_guc *guc) +{ + intel_guc_ct_stop(&guc->ct); + + __guc_stop_communication(guc); + + DRM_INFO("GuC communication stopped\n"); +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + __guc_stop_communication(guc); intel_guc_ct_disable(&guc->ct); @@ -537,7 +541,9 @@ void intel_uc_fini_hw(struct intel_uc *uc) if (intel_uc_supports_guc_submission(uc)) intel_guc_submission_disable(guc); - guc_disable_communication(guc); + if (guc_communication_enabled(guc)) + guc_disable_communication(guc); + __uc_sanitize(uc); } From 31444afb460ee627cb7800acee46548449c52368 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 29 Aug 2019 21:19:19 +0100 Subject: [PATCH 058/331] drm/i915: s/for_each_sgt_dma/for_each_sgt_daddr/ The sg_table for our backing store might contain addresses from stolen-memory or in the future local-memory, at which point this is no longer a dma-iterator. As a consequence we should now break on NULL iter.sgp, instead of dmap == 0 which is considered an invalid dma address. As a bonus, gcc much prefers this construct, Function old new delta gen8_ggtt_insert_entries 211 192 -19 gen6_ggtt_insert_entries 292 262 -30 i915_error_object_create 996 954 -42 Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190829201919.21493-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 8 ++++---- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c94dfa5622474..3c50cea76dc5c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2208,7 +2208,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_dma(addr, sgt_iter, vma->pages) + for_each_sgt_daddr(addr, sgt_iter, vma->pages) gen8_set_pte(gtt_entries++, pte_encode | addr); /* @@ -2249,7 +2249,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; struct sgt_iter iter; dma_addr_t addr; - for_each_sgt_dma(addr, iter, vma->pages) + for_each_sgt_daddr(addr, iter, vma->pages) iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); /* diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b97a47fc7a68a..576f6fb95d135 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -148,8 +148,8 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) -#define for_each_sgt_dma(__dmap, __iter, __sgt) \ - __for_each_sgt_dma(__dmap, __iter, __sgt, I915_GTT_PAGE_SIZE) +#define for_each_sgt_daddr(__dp, __iter, __sgt) \ + __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) struct intel_remapped_plane_info { /* in gtt pages */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4aff342b89443..3ccf7fd9307f3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -990,7 +990,7 @@ i915_error_object_create(struct drm_i915_private *i915, dst->unused = 0; ret = -EINVAL; - for_each_sgt_dma(dma, iter, vma->pages) { + for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 6617963df9ed2..b7b59328cb76a 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -67,15 +67,15 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) } /** - * __for_each_sgt_dma - iterate over the DMA addresses of the given sg_table - * @__dmap: DMA address (output) + * __for_each_sgt_daddr - iterate over the device addresses of the given sg_table + * @__dp: Device address (output) * @__iter: 'struct sgt_iter' (iterator state, internal) * @__sgt: sg_table to iterate over (input) * @__step: step size */ -#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step) \ +#define __for_each_sgt_daddr(__dp, __iter, __sgt, __step) \ for ((__iter) = __sgt_iter((__sgt)->sgl, true); \ - ((__dmap) = (__iter).dma + (__iter).curr); \ + ((__dp) = (__iter).dma + (__iter).curr), (__iter).sgp; \ (((__iter).curr += (__step)) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index a5bec0a4cdcc5..97752deecccbf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -623,7 +623,7 @@ static bool assert_partial(struct drm_i915_gem_object *obj, struct sgt_iter sgt; dma_addr_t dma; - for_each_sgt_dma(dma, sgt, vma->pages) { + for_each_sgt_daddr(dma, sgt, vma->pages) { dma_addr_t src; if (!size) { From 0c84127102ee240e3039d6e78a3dba0afff2f810 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 27 Aug 2019 15:17:34 -0700 Subject: [PATCH 059/331] drm/i915/display: Rename update_crtcs() to commit_modeset_enables() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch has no functional changes. This just renames the update_crtcs() hooks to commit_modeset_enables() to match the drm_atomic helper naming conventions. v2: * Rebase on drm-tip Suggested-by: Daniel Vetter Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Cc: Jani Nikula Reviewed-by: Maarten Lankhorst Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190827221735.29351-2-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 10 +++++----- drivers/gpu/drm/i915/i915_drv.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4773832b0f49b..85183ef80c97e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13743,7 +13743,7 @@ static void intel_update_crtc(struct intel_crtc *crtc, intel_finish_crtc_commit(state, crtc); } -static void intel_update_crtcs(struct intel_atomic_state *state) +static void intel_commit_modeset_enables(struct intel_atomic_state *state) { struct intel_crtc *crtc; struct intel_crtc_state *old_crtc_state, *new_crtc_state; @@ -13758,7 +13758,7 @@ static void intel_update_crtcs(struct intel_atomic_state *state) } } -static void skl_update_crtcs(struct intel_atomic_state *state) +static void skl_commit_modeset_enables(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -13999,7 +13999,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_encoders_update_prepare(state); /* Now enable the clocks, plane, pipe, and connectors that we set up. */ - dev_priv->display.update_crtcs(state); + dev_priv->display.commit_modeset_enables(state); if (state->modeset) { intel_encoders_update_complete(state); @@ -15906,9 +15906,9 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } if (INTEL_GEN(dev_priv) >= 9) - dev_priv->display.update_crtcs = skl_update_crtcs; + dev_priv->display.commit_modeset_enables = skl_commit_modeset_enables; else - dev_priv->display.update_crtcs = intel_update_crtcs; + dev_priv->display.commit_modeset_enables = intel_commit_modeset_enables; } static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b42651a387d96..75a42e8df67e1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -288,7 +288,7 @@ struct drm_i915_display_funcs { struct intel_atomic_state *old_state); void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, struct intel_atomic_state *old_state); - void (*update_crtcs)(struct intel_atomic_state *state); + void (*commit_modeset_enables)(struct intel_atomic_state *state); void (*audio_codec_enable)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); From 66d9cec8a6c9f3eb1d81507d5f045e28272359fb Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Wed, 28 Aug 2019 15:47:01 -0700 Subject: [PATCH 060/331] drm/i915/display: Move the commit_tail() disable sequence to separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new function intel_commit_modeset_disables() consistent with the naming in drm atomic helpers and similar to the enable function. This helps better organize the disable sequence in atomic_commit_tail() No functional change v4: * Do not create a function pointer, just a function (Maarten) v3: * Rebase (Manasi) v2: * Create a helper for old_crtc_state disables (Lucas) Suggested-by: Daniel Vetter Cc: Ville Syrjälä Cc: Maarten Lankhorst Cc: Matt Roper Cc: Jani Nikula Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190828224701.422-1-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 92 ++++++++++++-------- drivers/gpu/drm/i915/i915_drv.h | 1 + 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 85183ef80c97e..b38d842ff6ec6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13743,6 +13743,61 @@ static void intel_update_crtc(struct intel_crtc *crtc, intel_finish_crtc_commit(state, crtc); } +static void intel_old_crtc_state_disables(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + intel_crtc_disable_planes(state, crtc); + + /* + * We need to disable pipe CRC before disabling the pipe, + * or we race against vblank off. + */ + intel_crtc_disable_pipe_crc(crtc); + + dev_priv->display.crtc_disable(old_crtc_state, state); + crtc->active = false; + intel_fbc_disable(crtc); + intel_disable_shared_dpll(old_crtc_state); + + /* + * Underruns don't always raise interrupts, + * so check manually. + */ + intel_check_cpu_fifo_underruns(dev_priv); + intel_check_pch_fifo_underruns(dev_priv); + + /* FIXME unify this for all platforms */ + if (!new_crtc_state->base.active && + !HAS_GMCH(dev_priv) && + dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, + new_crtc_state); +} + +static void intel_commit_modeset_disables(struct intel_atomic_state *state) +{ + struct intel_crtc_state *new_crtc_state, *old_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (!needs_modeset(new_crtc_state)) + continue; + + intel_pre_plane_update(old_crtc_state, new_crtc_state); + + if (old_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_crtc_state, + new_crtc_state, + crtc); + } +} + static void intel_commit_modeset_enables(struct intel_atomic_state *state) { struct intel_crtc *crtc; @@ -13923,42 +13978,10 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) put_domains[crtc->pipe] = modeset_get_crtc_power_domains(new_crtc_state); } - - if (!needs_modeset(new_crtc_state)) - continue; - - intel_pre_plane_update(old_crtc_state, new_crtc_state); - - if (old_crtc_state->base.active) { - intel_crtc_disable_planes(state, crtc); - - /* - * We need to disable pipe CRC before disabling the pipe, - * or we race against vblank off. - */ - intel_crtc_disable_pipe_crc(crtc); - - dev_priv->display.crtc_disable(old_crtc_state, state); - crtc->active = false; - intel_fbc_disable(crtc); - intel_disable_shared_dpll(old_crtc_state); - - /* - * Underruns don't always raise - * interrupts, so check manually. - */ - intel_check_cpu_fifo_underruns(dev_priv); - intel_check_pch_fifo_underruns(dev_priv); - - /* FIXME unify this for all platforms */ - if (!new_crtc_state->base.active && - !HAS_GMCH(dev_priv) && - dev_priv->display.initial_watermarks) - dev_priv->display.initial_watermarks(state, - new_crtc_state); - } } + intel_commit_modeset_disables(state); + /* FIXME: Eventually get rid of our crtc->config pointer */ for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) crtc->config = new_crtc_state; @@ -15909,6 +15932,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.commit_modeset_enables = skl_commit_modeset_enables; else dev_priv->display.commit_modeset_enables = intel_commit_modeset_enables; + } static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 75a42e8df67e1..db7480831e52b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -289,6 +289,7 @@ struct drm_i915_display_funcs { void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, struct intel_atomic_state *old_state); void (*commit_modeset_enables)(struct intel_atomic_state *state); + void (*commit_modeset_disables)(struct intel_atomic_state *state); void (*audio_codec_enable)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); From 11988e393813f520974f69bf0e55c367a1cbac48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 29 Aug 2019 09:11:15 +0100 Subject: [PATCH 061/331] drm/i915/execlists: Try rearranging breadcrumb flush The addition of the DC_FLUSH failed to ensure sanctity of the post-sync write as CI immediately got a completion CS-event before the breadcrumb was coherent. So let's try the other idea of moving the post-sync write into the CS_STALL. References: https://bugs.freedesktop.org/show_bug.cgi?id=111514 References: e8f6b4952ec5 ("drm/i915/execlists: Flush the post-sync breadcrumb write harder") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190829081150.10271-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 171d5205962c0..7d460b1842ddd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2915,20 +2915,18 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, request->timeline->hwsp_offset, - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE); - - /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ - /* XXX DC_FLUSH for post-sync write? (cf early context-switch bug) */ - cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL, - 0); + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen8_emit_fini_breadcrumb_footer(request, cs); } From 0dcceb35a13de07d8c28305b69a33b339c7eba0a Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Wed, 28 Aug 2019 22:12:11 +0530 Subject: [PATCH 062/331] drm/i915: mei_hdcp: I915 sends ddi index as per ME FW I915 converts it's port value into ddi index defiend by ME FW and pass it as a member of hdcp_port_data structure. Hence expose the enum mei_fw_ddi to I915 through i915_mei_interface.h. v2: Copyright years are bumped [Tomas] Signed-off-by: Ramalingam C Acked-by: Jani Nikula Reviewed-by: Shashank Sharma Acked-by: Tomas Winkler Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190828164216.405-2-ramalingam.c@intel.com --- drivers/gpu/drm/i915/display/intel_hdcp.c | 17 +++++++++++- drivers/misc/mei/hdcp/mei_hdcp.c | 34 ++++++++--------------- drivers/misc/mei/hdcp/mei_hdcp.h | 12 -------- include/drm/i915_mei_hdcp_interface.h | 18 ++++++++++-- 4 files changed, 42 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 6ec5ceeab6014..e8b04cc8fcb11 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: MIT */ /* * Copyright (C) 2017 Google, Inc. + * Copyright _ 2017-2019, Intel Corporation. * * Authors: * Sean Paul + * Ramalingam C */ #include @@ -1749,13 +1751,26 @@ static const struct component_ops i915_hdcp_component_ops = { .unbind = i915_hdcp_component_unbind, }; +static inline +enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) +{ + switch (port) { + case PORT_A: + return MEI_DDI_A; + case PORT_B ... PORT_F: + return (enum mei_fw_ddi)port; + default: + return MEI_DDI_INVALID_PORT; + } +} + static inline int initialize_hdcp_port_data(struct intel_connector *connector, const struct intel_hdcp_shim *shim) { struct intel_hdcp *hdcp = &connector->hdcp; struct hdcp_port_data *data = &hdcp->port_data; - data->port = connector->encoder->port; + data->fw_ddi = intel_get_mei_fw_ddi_index(connector->encoder->port); data->port_type = (u8)HDCP_PORT_TYPE_INTEGRATED; data->protocol = (u8)shim->protocol; diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c index c681f6fab342d..3638c77eba268 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.c +++ b/drivers/misc/mei/hdcp/mei_hdcp.c @@ -27,18 +27,6 @@ #include "mei_hdcp.h" -static inline u8 mei_get_ddi_index(enum port port) -{ - switch (port) { - case PORT_A: - return MEI_DDI_A; - case PORT_B ... PORT_F: - return (u8)port; - default: - return MEI_DDI_INVALID_PORT; - } -} - /** * mei_hdcp_initiate_session() - Initiate a Wired HDCP2.2 Tx Session in ME FW * @dev: device corresponding to the mei_cl_device @@ -69,7 +57,7 @@ mei_hdcp_initiate_session(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_INITIATE_HDCP2_SESSION_IN; session_init_in.port.integrated_port_type = data->port_type; - session_init_in.port.physical_port = mei_get_ddi_index(data->port); + session_init_in.port.physical_port = (u8)data->fw_ddi; session_init_in.protocol = data->protocol; byte = mei_cldev_send(cldev, (u8 *)&session_init_in, @@ -138,7 +126,7 @@ mei_hdcp_verify_receiver_cert_prepare_km(struct device *dev, WIRED_CMD_BUF_LEN_VERIFY_RECEIVER_CERT_IN; verify_rxcert_in.port.integrated_port_type = data->port_type; - verify_rxcert_in.port.physical_port = mei_get_ddi_index(data->port); + verify_rxcert_in.port.physical_port = (u8)data->fw_ddi; verify_rxcert_in.cert_rx = rx_cert->cert_rx; memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN); @@ -208,7 +196,7 @@ mei_hdcp_verify_hprime(struct device *dev, struct hdcp_port_data *data, send_hprime_in.header.buffer_len = WIRED_CMD_BUF_LEN_AKE_SEND_HPRIME_IN; send_hprime_in.port.integrated_port_type = data->port_type; - send_hprime_in.port.physical_port = mei_get_ddi_index(data->port); + send_hprime_in.port.physical_port = (u8)data->fw_ddi; memcpy(send_hprime_in.h_prime, rx_hprime->h_prime, HDCP_2_2_H_PRIME_LEN); @@ -265,7 +253,7 @@ mei_hdcp_store_pairing_info(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_SEND_PAIRING_INFO_IN; pairing_info_in.port.integrated_port_type = data->port_type; - pairing_info_in.port.physical_port = mei_get_ddi_index(data->port); + pairing_info_in.port.physical_port = (u8)data->fw_ddi; memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km, HDCP_2_2_E_KH_KM_LEN); @@ -323,7 +311,7 @@ mei_hdcp_initiate_locality_check(struct device *dev, lc_init_in.header.buffer_len = WIRED_CMD_BUF_LEN_INIT_LOCALITY_CHECK_IN; lc_init_in.port.integrated_port_type = data->port_type; - lc_init_in.port.physical_port = mei_get_ddi_index(data->port); + lc_init_in.port.physical_port = (u8)data->fw_ddi; byte = mei_cldev_send(cldev, (u8 *)&lc_init_in, sizeof(lc_init_in)); if (byte < 0) { @@ -378,7 +366,7 @@ mei_hdcp_verify_lprime(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_VALIDATE_LOCALITY_IN; verify_lprime_in.port.integrated_port_type = data->port_type; - verify_lprime_in.port.physical_port = mei_get_ddi_index(data->port); + verify_lprime_in.port.physical_port = (u8)data->fw_ddi; memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime, HDCP_2_2_L_PRIME_LEN); @@ -435,7 +423,7 @@ static int mei_hdcp_get_session_key(struct device *dev, get_skey_in.header.buffer_len = WIRED_CMD_BUF_LEN_GET_SESSION_KEY_IN; get_skey_in.port.integrated_port_type = data->port_type; - get_skey_in.port.physical_port = mei_get_ddi_index(data->port); + get_skey_in.port.physical_port = (u8)data->fw_ddi; byte = mei_cldev_send(cldev, (u8 *)&get_skey_in, sizeof(get_skey_in)); if (byte < 0) { @@ -499,7 +487,7 @@ mei_hdcp_repeater_check_flow_prepare_ack(struct device *dev, WIRED_CMD_BUF_LEN_VERIFY_REPEATER_IN; verify_repeater_in.port.integrated_port_type = data->port_type; - verify_repeater_in.port.physical_port = mei_get_ddi_index(data->port); + verify_repeater_in.port.physical_port = (u8)data->fw_ddi; memcpy(verify_repeater_in.rx_info, rep_topology->rx_info, HDCP_2_2_RXINFO_LEN); @@ -569,7 +557,7 @@ static int mei_hdcp_verify_mprime(struct device *dev, WIRED_CMD_BUF_LEN_REPEATER_AUTH_STREAM_REQ_MIN_IN; verify_mprime_in.port.integrated_port_type = data->port_type; - verify_mprime_in.port.physical_port = mei_get_ddi_index(data->port); + verify_mprime_in.port.physical_port = (u8)data->fw_ddi; memcpy(verify_mprime_in.m_prime, stream_ready->m_prime, HDCP_2_2_MPRIME_LEN); @@ -630,7 +618,7 @@ static int mei_hdcp_enable_authentication(struct device *dev, enable_auth_in.header.buffer_len = WIRED_CMD_BUF_LEN_ENABLE_AUTH_IN; enable_auth_in.port.integrated_port_type = data->port_type; - enable_auth_in.port.physical_port = mei_get_ddi_index(data->port); + enable_auth_in.port.physical_port = (u8)data->fw_ddi; enable_auth_in.stream_type = data->streams[0].stream_type; byte = mei_cldev_send(cldev, (u8 *)&enable_auth_in, @@ -684,7 +672,7 @@ mei_hdcp_close_session(struct device *dev, struct hdcp_port_data *data) WIRED_CMD_BUF_LEN_CLOSE_SESSION_IN; session_close_in.port.integrated_port_type = data->port_type; - session_close_in.port.physical_port = mei_get_ddi_index(data->port); + session_close_in.port.physical_port = (u8)data->fw_ddi; byte = mei_cldev_send(cldev, (u8 *)&session_close_in, sizeof(session_close_in)); diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h index e4b1cd54c853d..e60282eb2d484 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.h +++ b/drivers/misc/mei/hdcp/mei_hdcp.h @@ -362,16 +362,4 @@ struct wired_cmd_repeater_auth_stream_req_out { struct hdcp_cmd_header header; struct hdcp_port_id port; } __packed; - -enum mei_fw_ddi { - MEI_DDI_INVALID_PORT = 0x0, - - MEI_DDI_B = 1, - MEI_DDI_C, - MEI_DDI_D, - MEI_DDI_E, - MEI_DDI_F, - MEI_DDI_A = 7, - MEI_DDI_RANGE_END = MEI_DDI_A, -}; #endif /* __MEI_HDCP_H__ */ diff --git a/include/drm/i915_mei_hdcp_interface.h b/include/drm/i915_mei_hdcp_interface.h index 8c344255146aa..08670aa650d4f 100644 --- a/include/drm/i915_mei_hdcp_interface.h +++ b/include/drm/i915_mei_hdcp_interface.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0+) */ /* - * Copyright © 2017-2018 Intel Corporation + * Copyright © 2017-2019 Intel Corporation * * Authors: * Ramalingam C @@ -42,9 +42,21 @@ enum hdcp_wired_protocol { HDCP_PROTOCOL_DP }; +enum mei_fw_ddi { + MEI_DDI_INVALID_PORT = 0x0, + + MEI_DDI_B = 1, + MEI_DDI_C, + MEI_DDI_D, + MEI_DDI_E, + MEI_DDI_F, + MEI_DDI_A = 7, + MEI_DDI_RANGE_END = MEI_DDI_A, +}; + /** * struct hdcp_port_data - intel specific HDCP port data - * @port: port index as per I915 + * @fw_ddi: ddi index as per ME FW * @port_type: HDCP port type as per ME FW classification * @protocol: HDCP adaptation as per ME FW * @k: No of streams transmitted on a port. Only on DP MST this is != 1 @@ -56,7 +68,7 @@ enum hdcp_wired_protocol { * streams */ struct hdcp_port_data { - enum port port; + enum mei_fw_ddi fw_ddi; u8 port_type; u8 protocol; u16 k; From 5b6030da28cd6d6f5d129d7039ea0db83bcee6cf Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Wed, 28 Aug 2019 22:12:12 +0530 Subject: [PATCH 063/331] drm: Move port definition back to i915 header We dont need the definition of the enum port outside I915, anymore. Hence move enum port definition into I915 driver itself. v2: intel_display.h is included in intel_hdcp.h v3: enum port is declared in headers. v4: commit msg is rephrased. v5: copyright year is updated [Tomas] Signed-off-by: Ramalingam C Reviewed-by: Jani Nikula Reviewed-by: Shashank Sharma Reviewed-by: Tomas Winkler Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190828164216.405-3-ramalingam.c@intel.com --- drivers/gpu/drm/i915/display/intel_bios.h | 3 ++- drivers/gpu/drm/i915/display/intel_display.h | 20 +++++++++++++++++++- drivers/gpu/drm/i915/display/intel_dp.h | 1 + drivers/gpu/drm/i915/display/intel_hdcp.h | 1 + drivers/gpu/drm/i915/display/intel_hdmi.h | 1 + drivers/gpu/drm/i915/display/intel_hotplug.h | 1 + drivers/gpu/drm/i915/display/intel_sdvo.h | 1 + include/drm/i915_drm.h | 18 ------------------ 8 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 4969189e620f5..98f064828a576 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -1,5 +1,5 @@ /* - * Copyright © 2016 Intel Corporation + * Copyright © 2016-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +35,7 @@ #include struct drm_i915_private; +enum port; enum intel_backlight_type { INTEL_BACKLIGHT_PMIC, diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 03321fb4a7038..33fd523c4622b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -1,5 +1,5 @@ /* - * Copyright © 2006-2017 Intel Corporation + * Copyright © 2006-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -182,6 +182,24 @@ enum plane_id { for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ for_each_if((__crtc)->plane_ids_mask & BIT(__p)) +enum port { + PORT_NONE = -1, + + PORT_A = 0, + PORT_B, + PORT_C, + PORT_D, + PORT_E, + PORT_F, + PORT_G, + PORT_H, + PORT_I, + + I915_MAX_PORTS +}; + +#define port_name(p) ((p) + 'A') + /* * Ports identifier referenced from other drivers. * Expected to remain stable over time diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 657bbb1f5ed08..e01d1f89409da 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -13,6 +13,7 @@ #include "i915_reg.h" enum pipe; +enum port; struct drm_connector_state; struct drm_encoder; struct drm_i915_private; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 13555b054930a..59a2b40405cc1 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -15,6 +15,7 @@ struct drm_connector_state; struct drm_i915_private; struct intel_connector; struct intel_hdcp_shim; +enum port; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 106c2e0bc3c9f..cf1ea54276391 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -23,6 +23,7 @@ struct intel_crtc_state; struct intel_hdmi; struct drm_connector_state; union hdmi_infoframe; +enum port; void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.h b/drivers/gpu/drm/i915/display/intel_hotplug.h index b0cd447b7fbc3..087b5f57b3218 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.h +++ b/drivers/gpu/drm/i915/display/intel_hotplug.h @@ -13,6 +13,7 @@ struct drm_i915_private; struct intel_connector; struct intel_encoder; +enum port; void intel_hpd_poll_init(struct drm_i915_private *dev_priv); enum intel_hotplug_state intel_encoder_hotplug(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.h b/drivers/gpu/drm/i915/display/intel_sdvo.h index c9e05bcdd1412..a66f224aa17d3 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.h +++ b/drivers/gpu/drm/i915/display/intel_sdvo.h @@ -14,6 +14,7 @@ struct drm_i915_private; enum pipe; +enum port; bool intel_sdvo_port_enabled(struct drm_i915_private *dev_priv, i915_reg_t sdvo_reg, enum pipe *pipe); diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 23274cf927127..6722005884db0 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -100,22 +100,4 @@ extern struct resource intel_graphics_stolen_res; #define INTEL_GEN11_BSM_DW1 0xc4 #define INTEL_BSM_MASK (-(1u << 20)) -enum port { - PORT_NONE = -1, - - PORT_A = 0, - PORT_B, - PORT_C, - PORT_D, - PORT_E, - PORT_F, - PORT_G, - PORT_H, - PORT_I, - - I915_MAX_PORTS -}; - -#define port_name(p) ((p) + 'A') - #endif /* _I915_DRM_H_ */ From 807c71d59a1063badc8bf60ee71470c2ff4be1cc Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Wed, 28 Aug 2019 22:12:13 +0530 Subject: [PATCH 064/331] drm: Extend I915 mei interface for transcoder info I915 needs to send the index of the transcoder as per ME FW. To support this, define enum mei_fw_tc and add as a member into the struct hdcp_port_data. v2: Typo in commit msg is fixed [Shashank] v3: kdoc is added for mei_fw_tc [Tomas] s/MEI_TC_x/MEI_TRANSCODER_x Signed-off-by: Ramalingam C Acked-by: Jani Nikula Acked-by: Tomas Winkler Reviewed-by: Shashank Sharma Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190828164216.405-4-ramalingam.c@intel.com --- include/drm/i915_mei_hdcp_interface.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/drm/i915_mei_hdcp_interface.h b/include/drm/i915_mei_hdcp_interface.h index 08670aa650d4f..4d48de8890ca0 100644 --- a/include/drm/i915_mei_hdcp_interface.h +++ b/include/drm/i915_mei_hdcp_interface.h @@ -54,9 +54,32 @@ enum mei_fw_ddi { MEI_DDI_RANGE_END = MEI_DDI_A, }; +/** + * enum mei_fw_tc - ME Firmware defined index for transcoders + * @MEI_INVALID_TRANSCODER: Index for Invalid transcoder + * @MEI_TRANSCODER_EDP: Index for EDP Transcoder + * @MEI_TRANSCODER_DSI0: Index for DSI0 Transcoder + * @MEI_TRANSCODER_DSI1: Index for DSI1 Transcoder + * @MEI_TRANSCODER_A: Index for Transcoder A + * @MEI_TRANSCODER_B: Index for Transcoder B + * @MEI_TRANSCODER_C: Index for Transcoder C + * @MEI_TRANSCODER_D: Index for Transcoder D + */ +enum mei_fw_tc { + MEI_INVALID_TRANSCODER = 0x00, + MEI_TRANSCODER_EDP, + MEI_TRANSCODER_DSI0, + MEI_TRANSCODER_DSI1, + MEI_TRANSCODER_A = 0x10, + MEI_TRANSCODER_B, + MEI_TRANSCODER_C, + MEI_TRANSCODER_D +}; + /** * struct hdcp_port_data - intel specific HDCP port data * @fw_ddi: ddi index as per ME FW + * @fw_tc: transcoder index as per ME FW * @port_type: HDCP port type as per ME FW classification * @protocol: HDCP adaptation as per ME FW * @k: No of streams transmitted on a port. Only on DP MST this is != 1 @@ -69,6 +92,7 @@ enum mei_fw_ddi { */ struct hdcp_port_data { enum mei_fw_ddi fw_ddi; + enum mei_fw_tc fw_tc; u8 port_type; u8 protocol; u16 k; From 2d15cf1b9ae4c623b1a05838317f4c18fd534c9a Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Wed, 28 Aug 2019 22:12:14 +0530 Subject: [PATCH 065/331] misc/mei/hdcp: Fill transcoder index in port info For gen12+ platform we need to pass the transcoder info as part of the port info into ME FW. This change fills the payload for ME FW from hdcp_port_data. v2: Doc is enhanced for physical_port and attached_transcoder [Tomas] Signed-off-by: Ramalingam C Acked-by: Jani Nikula Reviewed-by: Shashank Sharma Acked-by: Tomas Winkler Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190828164216.405-5-ramalingam.c@intel.com --- drivers/misc/mei/hdcp/mei_hdcp.c | 11 +++++++++++ drivers/misc/mei/hdcp/mei_hdcp.h | 5 ++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c index 3638c77eba268..93027fd96c716 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.c +++ b/drivers/misc/mei/hdcp/mei_hdcp.c @@ -58,6 +58,7 @@ mei_hdcp_initiate_session(struct device *dev, struct hdcp_port_data *data, session_init_in.port.integrated_port_type = data->port_type; session_init_in.port.physical_port = (u8)data->fw_ddi; + session_init_in.port.attached_transcoder = (u8)data->fw_tc; session_init_in.protocol = data->protocol; byte = mei_cldev_send(cldev, (u8 *)&session_init_in, @@ -127,6 +128,7 @@ mei_hdcp_verify_receiver_cert_prepare_km(struct device *dev, verify_rxcert_in.port.integrated_port_type = data->port_type; verify_rxcert_in.port.physical_port = (u8)data->fw_ddi; + verify_rxcert_in.port.attached_transcoder = (u8)data->fw_tc; verify_rxcert_in.cert_rx = rx_cert->cert_rx; memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN); @@ -197,6 +199,7 @@ mei_hdcp_verify_hprime(struct device *dev, struct hdcp_port_data *data, send_hprime_in.port.integrated_port_type = data->port_type; send_hprime_in.port.physical_port = (u8)data->fw_ddi; + send_hprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(send_hprime_in.h_prime, rx_hprime->h_prime, HDCP_2_2_H_PRIME_LEN); @@ -254,6 +257,7 @@ mei_hdcp_store_pairing_info(struct device *dev, struct hdcp_port_data *data, pairing_info_in.port.integrated_port_type = data->port_type; pairing_info_in.port.physical_port = (u8)data->fw_ddi; + pairing_info_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km, HDCP_2_2_E_KH_KM_LEN); @@ -312,6 +316,7 @@ mei_hdcp_initiate_locality_check(struct device *dev, lc_init_in.port.integrated_port_type = data->port_type; lc_init_in.port.physical_port = (u8)data->fw_ddi; + lc_init_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&lc_init_in, sizeof(lc_init_in)); if (byte < 0) { @@ -367,6 +372,7 @@ mei_hdcp_verify_lprime(struct device *dev, struct hdcp_port_data *data, verify_lprime_in.port.integrated_port_type = data->port_type; verify_lprime_in.port.physical_port = (u8)data->fw_ddi; + verify_lprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime, HDCP_2_2_L_PRIME_LEN); @@ -424,6 +430,7 @@ static int mei_hdcp_get_session_key(struct device *dev, get_skey_in.port.integrated_port_type = data->port_type; get_skey_in.port.physical_port = (u8)data->fw_ddi; + get_skey_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&get_skey_in, sizeof(get_skey_in)); if (byte < 0) { @@ -488,6 +495,7 @@ mei_hdcp_repeater_check_flow_prepare_ack(struct device *dev, verify_repeater_in.port.integrated_port_type = data->port_type; verify_repeater_in.port.physical_port = (u8)data->fw_ddi; + verify_repeater_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_repeater_in.rx_info, rep_topology->rx_info, HDCP_2_2_RXINFO_LEN); @@ -558,6 +566,7 @@ static int mei_hdcp_verify_mprime(struct device *dev, verify_mprime_in.port.integrated_port_type = data->port_type; verify_mprime_in.port.physical_port = (u8)data->fw_ddi; + verify_mprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_mprime_in.m_prime, stream_ready->m_prime, HDCP_2_2_MPRIME_LEN); @@ -619,6 +628,7 @@ static int mei_hdcp_enable_authentication(struct device *dev, enable_auth_in.port.integrated_port_type = data->port_type; enable_auth_in.port.physical_port = (u8)data->fw_ddi; + enable_auth_in.port.attached_transcoder = (u8)data->fw_tc; enable_auth_in.stream_type = data->streams[0].stream_type; byte = mei_cldev_send(cldev, (u8 *)&enable_auth_in, @@ -673,6 +683,7 @@ mei_hdcp_close_session(struct device *dev, struct hdcp_port_data *data) session_close_in.port.integrated_port_type = data->port_type; session_close_in.port.physical_port = (u8)data->fw_ddi; + session_close_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&session_close_in, sizeof(session_close_in)); diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h index e60282eb2d484..18ffc773fa18f 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.h +++ b/drivers/misc/mei/hdcp/mei_hdcp.h @@ -184,8 +184,11 @@ struct hdcp_cmd_no_data { /* Uniquely identifies the hdcp port being addressed for a given command. */ struct hdcp_port_id { u8 integrated_port_type; + /* physical_port is used until Gen11.5. Must be zero for Gen11.5+ */ u8 physical_port; - u16 reserved; + /* attached_transcoder is for Gen11.5+. Set to zero for Date: Wed, 28 Aug 2019 22:12:15 +0530 Subject: [PATCH 066/331] drm/i915/hdcp: update current transcoder into intel_hdcp On gen12+ platforms, HDCP HW is associated to the transcoder. Hence on every modeset update associated transcoder into the intel_hdcp of the port. v2: s/trans/cpu_transcoder [Jani] v3: comment is added for fw_ddi init for gen12+ [Shashank] only hdcp capable transcoder is translated into fw_tc [Shashank] v4: fw_tc initialization is kept for modeset. [Tomas] few extra doc is added at port_data init [Tomas] v5: Few comments are improvised [Tomas] Signed-off-by: Ramalingam C Acked-by: Jani Nikula Reviewed-by: Shashank Sharma Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190828164216.405-6-ramalingam.c@intel.com --- .../drm/i915/display/intel_display_types.h | 7 +++ drivers/gpu/drm/i915/display/intel_dp.c | 3 ++ drivers/gpu/drm/i915/display/intel_hdcp.c | 47 ++++++++++++++++++- drivers/gpu/drm/i915/display/intel_hdcp.h | 3 ++ drivers/gpu/drm/i915/display/intel_hdmi.c | 3 ++ 5 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 96514dcc78128..61277a87dbe7a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -388,6 +388,13 @@ struct intel_hdcp { wait_queue_head_t cp_irq_queue; atomic_t cp_irq_count; int cp_irq_count_cached; + + /* + * HDCP register access for gen12+ need the transcoder associated. + * Transcoder attached to the connector could be changed at modeset. + * Hence caching the transcoder here. + */ + enum transcoder cpu_transcoder; }; struct intel_connector { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index c9986c7a3f5ba..aaa90992a7f0e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2261,6 +2261,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_psr_compute_config(intel_dp, pipe_config); + intel_hdcp_transcoder_config(intel_connector, + pipe_config->cpu_transcoder); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index e8b04cc8fcb11..edcec64a2c119 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1764,13 +1764,58 @@ enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) } } +static inline +enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) +{ + switch (cpu_transcoder) { + case TRANSCODER_A ... TRANSCODER_D: + return (enum mei_fw_tc)(cpu_transcoder | 0x10); + default: /* eDP, DSI TRANSCODERS are non HDCP capable */ + return MEI_INVALID_TRANSCODER; + } +} + +void intel_hdcp_transcoder_config(struct intel_connector *connector, + enum transcoder cpu_transcoder) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_hdcp *hdcp = &connector->hdcp; + + if (!hdcp->shim) + return; + + if (INTEL_GEN(dev_priv) >= 12) { + mutex_lock(&hdcp->mutex); + hdcp->cpu_transcoder = cpu_transcoder; + hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); + mutex_unlock(&hdcp->mutex); + } +} + static inline int initialize_hdcp_port_data(struct intel_connector *connector, const struct intel_hdcp_shim *shim) { + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; struct hdcp_port_data *data = &hdcp->port_data; - data->fw_ddi = intel_get_mei_fw_ddi_index(connector->encoder->port); + if (INTEL_GEN(dev_priv) < 12) + data->fw_ddi = + intel_get_mei_fw_ddi_index(connector->encoder->port); + else + /* + * As per ME FW API expectation, for GEN 12+, fw_ddi is filled + * with zero(INVALID PORT index). + */ + data->fw_ddi = MEI_DDI_INVALID_PORT; + + /* + * As associated transcoder is set and modified at modeset, here fw_tc + * is initialized to zero (invalid transcoder index). This will be + * retained for fw_tc = MEI_INVALID_TRANSCODER; + data->port_type = (u8)HDCP_PORT_TYPE_INTEGRATED; data->protocol = (u8)shim->protocol; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 59a2b40405cc1..41c1053d9e38b 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -16,10 +16,13 @@ struct drm_i915_private; struct intel_connector; struct intel_hdcp_shim; enum port; +enum transcoder; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); +void intel_hdcp_transcoder_config(struct intel_connector *connector, + enum transcoder cpu_transcoder); int intel_hdcp_init(struct intel_connector *connector, const struct intel_hdcp_shim *hdcp_shim); int intel_hdcp_enable(struct intel_connector *connector, u8 content_type); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index e02f0faecf028..6e9bb6bd1ee22 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2431,6 +2431,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, return -EINVAL; } + intel_hdcp_transcoder_config(intel_hdmi->attached_connector, + pipe_config->cpu_transcoder); + return 0; } From 692059318c0fc6c3584b861adc67abbb0c1598f0 Mon Sep 17 00:00:00 2001 From: Ramalingam C Date: Wed, 28 Aug 2019 22:12:16 +0530 Subject: [PATCH 067/331] drm/i915/hdcp: Enable HDCP 1.4 and 2.2 on Gen12+ >From Gen12 onwards, HDCP HW block is implemented within transcoders. Till Gen11 HDCP HW block was part of DDI. Hence required changes in HW programming is handled here. As ME FW needs the transcoder detail on which HDCP is enabled on Gen12+ platform, we are populating the detail in hdcp_port_data. v2: _MMIO_TRANS is used [Lucas and Daniel] platform check is moved into the caller [Lucas] v3: platform check is moved into a macro [Shashank] v4: Few optimizations in the coding [Shashank] v5: Fixed alignment in macro definition in i915_reg.h [Shashank] unused variables "reg" is removed. v6: Configuring the transcoder at compute_config. transcoder is used instead of pipe in macros. Rebased. v7: transcoder is cached at intel_hdcp hdcp_port_data is configured with transcoder index asper ME FW. v8: s/trans/cpu_transcoder s/tc/cpu_transcoder v9: rep_ctl is prepared for TCD too. return moved into deault of rep_ctl prepare function [Shashank] Signed-off-by: Ramalingam C Reviewed-by: Shashank Sharma Acked-by: Jani Nikula Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20190828164216.405-7-ramalingam.c@intel.com --- drivers/gpu/drm/i915/display/intel_hdcp.c | 152 ++++++++++++++-------- drivers/gpu/drm/i915/display/intel_hdmi.c | 10 +- drivers/gpu/drm/i915/i915_reg.h | 124 ++++++++++++++++-- 3 files changed, 221 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index edcec64a2c119..e69fa34528df2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -20,6 +20,7 @@ #include "intel_display_types.h" #include "intel_hdcp.h" #include "intel_sideband.h" +#include "intel_connector.h" #define KEY_LOAD_TRIES 5 #define ENCRYPT_STATUS_CHANGE_TIMEOUT_MS 50 @@ -107,24 +108,20 @@ bool intel_hdcp2_capable(struct intel_connector *connector) return capable; } -static inline bool intel_hdcp_in_use(struct intel_connector *connector) +static inline +bool intel_hdcp_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum port port = connector->encoder->port; - u32 reg; - - reg = I915_READ(PORT_HDCP_STATUS(port)); - return reg & HDCP_STATUS_ENC; + return I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & + HDCP_STATUS_ENC; } -static inline bool intel_hdcp2_in_use(struct intel_connector *connector) +static inline +bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum port port = connector->encoder->port; - u32 reg; - - reg = I915_READ(HDCP2_STATUS_DDI(port)); - return reg & LINK_ENCRYPTION_STATUS; + return I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS; } static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, @@ -255,9 +252,29 @@ static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text) } static -u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) +u32 intel_hdcp_get_repeater_ctl(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - enum port port = intel_dig_port->base.port; + if (INTEL_GEN(dev_priv) >= 12) { + switch (cpu_transcoder) { + case TRANSCODER_A: + return HDCP_TRANSA_REP_PRESENT | + HDCP_TRANSA_SHA1_M0; + case TRANSCODER_B: + return HDCP_TRANSB_REP_PRESENT | + HDCP_TRANSB_SHA1_M0; + case TRANSCODER_C: + return HDCP_TRANSC_REP_PRESENT | + HDCP_TRANSC_SHA1_M0; + case TRANSCODER_D: + return HDCP_TRANSD_REP_PRESENT | + HDCP_TRANSD_SHA1_M0; + default: + DRM_ERROR("Unknown transcoder %d\n", cpu_transcoder); + return -EINVAL; + } + } + switch (port) { case PORT_A: return HDCP_DDIA_REP_PRESENT | HDCP_DDIA_SHA1_M0; @@ -270,18 +287,20 @@ u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) case PORT_E: return HDCP_DDIE_REP_PRESENT | HDCP_DDIE_SHA1_M0; default: - break; + DRM_ERROR("Unknown port %d\n", port); + return -EINVAL; } - DRM_ERROR("Unknown port %d\n", port); - return -EINVAL; } static -int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, +int intel_hdcp_validate_v_prime(struct intel_connector *connector, const struct intel_hdcp_shim *shim, u8 *ksv_fifo, u8 num_downstream, u8 *bstatus) { + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); struct drm_i915_private *dev_priv; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; + enum port port = intel_dig_port->base.port; u32 vprime, sha_text, sha_leftovers, rep_ctl; int ret, i, j, sha_idx; @@ -308,7 +327,7 @@ int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, sha_idx = 0; sha_text = 0; sha_leftovers = 0; - rep_ctl = intel_hdcp_get_repeater_ctl(intel_dig_port); + rep_ctl = intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, port); I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); for (i = 0; i < num_downstream; i++) { unsigned int sha_empty; @@ -550,7 +569,7 @@ int intel_hdcp_auth_downstream(struct intel_connector *connector) * V prime atleast twice. */ for (i = 0; i < tries; i++) { - ret = intel_hdcp_validate_v_prime(intel_dig_port, shim, + ret = intel_hdcp_validate_v_prime(connector, shim, ksv_fifo, num_downstream, bstatus); if (!ret) @@ -578,6 +597,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) struct drm_device *dev = connector->base.dev; const struct intel_hdcp_shim *shim = hdcp->shim; struct drm_i915_private *dev_priv; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; enum port port; unsigned long r0_prime_gen_start; int ret, i, tries = 2; @@ -617,18 +637,21 @@ static int intel_hdcp_auth(struct intel_connector *connector) /* Initialize An with 2 random values and acquire it */ for (i = 0; i < 2; i++) - I915_WRITE(PORT_HDCP_ANINIT(port), get_random_u32()); - I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_CAPTURE_AN); + I915_WRITE(HDCP_ANINIT(dev_priv, cpu_transcoder, port), + get_random_u32()); + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), + HDCP_CONF_CAPTURE_AN); /* Wait for An to be acquired */ - if (intel_de_wait_for_set(dev_priv, PORT_HDCP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), HDCP_STATUS_AN_READY, 1)) { DRM_ERROR("Timed out waiting for An\n"); return -ETIMEDOUT; } - an.reg[0] = I915_READ(PORT_HDCP_ANLO(port)); - an.reg[1] = I915_READ(PORT_HDCP_ANHI(port)); + an.reg[0] = I915_READ(HDCP_ANLO(dev_priv, cpu_transcoder, port)); + an.reg[1] = I915_READ(HDCP_ANHI(dev_priv, cpu_transcoder, port)); ret = shim->write_an_aksv(intel_dig_port, an.shim); if (ret) return ret; @@ -646,24 +669,26 @@ static int intel_hdcp_auth(struct intel_connector *connector) return -EPERM; } - I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]); - I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]); + I915_WRITE(HDCP_BKSVLO(dev_priv, cpu_transcoder, port), bksv.reg[0]); + I915_WRITE(HDCP_BKSVHI(dev_priv, cpu_transcoder, port), bksv.reg[1]); ret = shim->repeater_present(intel_dig_port, &repeater_present); if (ret) return ret; if (repeater_present) I915_WRITE(HDCP_REP_CTL, - intel_hdcp_get_repeater_ctl(intel_dig_port)); + intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, + port)); ret = shim->toggle_signalling(intel_dig_port, true); if (ret) return ret; - I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_AUTH_AND_ENC); + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), + HDCP_CONF_AUTH_AND_ENC); /* Wait for R0 ready */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { DRM_ERROR("Timed out waiting for R0 ready\n"); return -ETIMEDOUT; @@ -691,22 +716,25 @@ static int intel_hdcp_auth(struct intel_connector *connector) ret = shim->read_ri_prime(intel_dig_port, ri.shim); if (ret) return ret; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (!wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) break; } if (i == tries) { DRM_DEBUG_KMS("Timed out waiting for Ri prime match (%x)\n", - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); return -ETIMEDOUT; } /* Wait for encryption confirmation */ - if (intel_de_wait_for_set(dev_priv, PORT_HDCP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), HDCP_STATUS_ENC, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { DRM_ERROR("Timed out waiting for encryption\n"); @@ -731,15 +759,17 @@ static int _intel_hdcp_disable(struct intel_connector *connector) struct drm_i915_private *dev_priv = connector->base.dev->dev_private; struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; DRM_DEBUG_KMS("[%s:%d] HDCP is being disabled...\n", connector->base.name, connector->base.base.id); hdcp->hdcp_encrypted = false; - I915_WRITE(PORT_HDCP_CONF(port), 0); - if (intel_de_wait_for_clear(dev_priv, PORT_HDCP_STATUS(port), ~0, - ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), 0); + if (intel_de_wait_for_clear(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), + ~0, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { DRM_ERROR("Failed to disable HDCP, timeout clearing status\n"); return -ETIMEDOUT; } @@ -810,9 +840,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector) struct drm_i915_private *dev_priv = connector->base.dev->dev_private; struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder; int ret = 0; mutex_lock(&hdcp->mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* Check_link valid only when HDCP1.4 is enabled */ if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED || @@ -821,10 +853,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector) goto out; } - if (WARN_ON(!intel_hdcp_in_use(connector))) { + if (WARN_ON(!intel_hdcp_in_use(dev_priv, cpu_transcoder, port))) { DRM_ERROR("%s:%d HDCP link stopped encryption,%x\n", connector->base.name, connector->base.base.id, - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); ret = -ENXIO; hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; schedule_work(&hdcp->prop_work); @@ -1495,10 +1528,11 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; - WARN_ON(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS); - + WARN_ON(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS); if (hdcp->shim->toggle_signalling) { ret = hdcp->shim->toggle_signalling(intel_dig_port, true); if (ret) { @@ -1508,14 +1542,18 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) } } - if (I915_READ(HDCP2_STATUS_DDI(port)) & LINK_AUTH_STATUS) { + if (I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_AUTH_STATUS) { /* Link is Authenticated. Now set for Encryption */ - I915_WRITE(HDCP2_CTL_DDI(port), - I915_READ(HDCP2_CTL_DDI(port)) | + I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port), + I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, + port)) | CTL_LINK_ENCRYPTION_REQ); } - ret = intel_de_wait_for_set(dev_priv, HDCP2_STATUS_DDI(port), + ret = intel_de_wait_for_set(dev_priv, + HDCP2_STATUS(dev_priv, cpu_transcoder, + port), LINK_ENCRYPTION_STATUS, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); @@ -1528,14 +1566,19 @@ static int hdcp2_disable_encryption(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; - WARN_ON(!(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS)); + WARN_ON(!(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS)); - I915_WRITE(HDCP2_CTL_DDI(port), - I915_READ(HDCP2_CTL_DDI(port)) & ~CTL_LINK_ENCRYPTION_REQ); + I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port), + I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, port)) & + ~CTL_LINK_ENCRYPTION_REQ); - ret = intel_de_wait_for_clear(dev_priv, HDCP2_STATUS_DDI(port), + ret = intel_de_wait_for_clear(dev_priv, + HDCP2_STATUS(dev_priv, cpu_transcoder, + port), LINK_ENCRYPTION_STATUS, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); if (ret == -ETIMEDOUT) @@ -1634,9 +1677,11 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder; int ret = 0; mutex_lock(&hdcp->mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* hdcp2_check_link is expected only when HDCP2.2 is Enabled */ if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED || @@ -1645,9 +1690,10 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) goto out; } - if (WARN_ON(!intel_hdcp2_in_use(connector))) { + if (WARN_ON(!intel_hdcp2_in_use(dev_priv, cpu_transcoder, port))) { DRM_ERROR("HDCP2.2 link stopped the encryption, %x\n", - I915_READ(HDCP2_STATUS_DDI(port))); + I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, + port))); ret = -ENXIO; hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; schedule_work(&hdcp->prop_work); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 6e9bb6bd1ee22..f6bb77fd09303 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1491,7 +1491,10 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) { struct drm_i915_private *dev_priv = intel_dig_port->base.base.dev->dev_private; + struct intel_connector *connector = + intel_dig_port->hdmi.attached_connector; enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; int ret; union { u32 reg; @@ -1502,13 +1505,14 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) if (ret) return false; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n", - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); return false; } return true; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 02e1ef10c47e7..3cfdab18c0cfa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9281,12 +9281,20 @@ enum skl_power_gate { /* HDCP Repeater Registers */ #define HDCP_REP_CTL _MMIO(0x66d00) +#define HDCP_TRANSA_REP_PRESENT BIT(31) +#define HDCP_TRANSB_REP_PRESENT BIT(30) +#define HDCP_TRANSC_REP_PRESENT BIT(29) +#define HDCP_TRANSD_REP_PRESENT BIT(28) #define HDCP_DDIB_REP_PRESENT BIT(30) #define HDCP_DDIA_REP_PRESENT BIT(29) #define HDCP_DDIC_REP_PRESENT BIT(28) #define HDCP_DDID_REP_PRESENT BIT(27) #define HDCP_DDIF_REP_PRESENT BIT(26) #define HDCP_DDIE_REP_PRESENT BIT(25) +#define HDCP_TRANSA_SHA1_M0 (1 << 20) +#define HDCP_TRANSB_SHA1_M0 (2 << 20) +#define HDCP_TRANSC_SHA1_M0 (3 << 20) +#define HDCP_TRANSD_SHA1_M0 (4 << 20) #define HDCP_DDIB_SHA1_M0 (1 << 20) #define HDCP_DDIA_SHA1_M0 (2 << 20) #define HDCP_DDIC_SHA1_M0 (3 << 20) @@ -9326,15 +9334,92 @@ enum skl_power_gate { _PORTE_HDCP_AUTHENC, \ _PORTF_HDCP_AUTHENC) + (x)) #define PORT_HDCP_CONF(port) _PORT_HDCP_AUTHENC(port, 0x0) +#define _TRANSA_HDCP_CONF 0x66400 +#define _TRANSB_HDCP_CONF 0x66500 +#define TRANS_HDCP_CONF(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_CONF, \ + _TRANSB_HDCP_CONF) +#define HDCP_CONF(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_CONF(trans) : \ + PORT_HDCP_CONF(port)) + #define HDCP_CONF_CAPTURE_AN BIT(0) #define HDCP_CONF_AUTH_AND_ENC (BIT(1) | BIT(0)) #define PORT_HDCP_ANINIT(port) _PORT_HDCP_AUTHENC(port, 0x4) +#define _TRANSA_HDCP_ANINIT 0x66404 +#define _TRANSB_HDCP_ANINIT 0x66504 +#define TRANS_HDCP_ANINIT(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_ANINIT, \ + _TRANSB_HDCP_ANINIT) +#define HDCP_ANINIT(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANINIT(trans) : \ + PORT_HDCP_ANINIT(port)) + #define PORT_HDCP_ANLO(port) _PORT_HDCP_AUTHENC(port, 0x8) +#define _TRANSA_HDCP_ANLO 0x66408 +#define _TRANSB_HDCP_ANLO 0x66508 +#define TRANS_HDCP_ANLO(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_ANLO, \ + _TRANSB_HDCP_ANLO) +#define HDCP_ANLO(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANLO(trans) : \ + PORT_HDCP_ANLO(port)) + #define PORT_HDCP_ANHI(port) _PORT_HDCP_AUTHENC(port, 0xC) +#define _TRANSA_HDCP_ANHI 0x6640C +#define _TRANSB_HDCP_ANHI 0x6650C +#define TRANS_HDCP_ANHI(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_ANHI, \ + _TRANSB_HDCP_ANHI) +#define HDCP_ANHI(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANHI(trans) : \ + PORT_HDCP_ANHI(port)) + #define PORT_HDCP_BKSVLO(port) _PORT_HDCP_AUTHENC(port, 0x10) +#define _TRANSA_HDCP_BKSVLO 0x66410 +#define _TRANSB_HDCP_BKSVLO 0x66510 +#define TRANS_HDCP_BKSVLO(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_BKSVLO, \ + _TRANSB_HDCP_BKSVLO) +#define HDCP_BKSVLO(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_BKSVLO(trans) : \ + PORT_HDCP_BKSVLO(port)) + #define PORT_HDCP_BKSVHI(port) _PORT_HDCP_AUTHENC(port, 0x14) +#define _TRANSA_HDCP_BKSVHI 0x66414 +#define _TRANSB_HDCP_BKSVHI 0x66514 +#define TRANS_HDCP_BKSVHI(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_BKSVHI, \ + _TRANSB_HDCP_BKSVHI) +#define HDCP_BKSVHI(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_BKSVHI(trans) : \ + PORT_HDCP_BKSVHI(port)) + #define PORT_HDCP_RPRIME(port) _PORT_HDCP_AUTHENC(port, 0x18) +#define _TRANSA_HDCP_RPRIME 0x66418 +#define _TRANSB_HDCP_RPRIME 0x66518 +#define TRANS_HDCP_RPRIME(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_RPRIME, \ + _TRANSB_HDCP_RPRIME) +#define HDCP_RPRIME(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_RPRIME(trans) : \ + PORT_HDCP_RPRIME(port)) + #define PORT_HDCP_STATUS(port) _PORT_HDCP_AUTHENC(port, 0x1C) +#define _TRANSA_HDCP_STATUS 0x6641C +#define _TRANSB_HDCP_STATUS 0x6651C +#define TRANS_HDCP_STATUS(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_STATUS, \ + _TRANSB_HDCP_STATUS) +#define HDCP_STATUS(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_STATUS(trans) : \ + PORT_HDCP_STATUS(port)) + #define HDCP_STATUS_STREAM_A_ENC BIT(31) #define HDCP_STATUS_STREAM_B_ENC BIT(30) #define HDCP_STATUS_STREAM_C_ENC BIT(29) @@ -9361,23 +9446,44 @@ enum skl_power_gate { _PORTD_HDCP2_BASE, \ _PORTE_HDCP2_BASE, \ _PORTF_HDCP2_BASE) + (x)) - -#define HDCP2_AUTH_DDI(port) _PORT_HDCP2_BASE(port, 0x98) +#define PORT_HDCP2_AUTH(port) _PORT_HDCP2_BASE(port, 0x98) +#define _TRANSA_HDCP2_AUTH 0x66498 +#define _TRANSB_HDCP2_AUTH 0x66598 +#define TRANS_HDCP2_AUTH(trans) _MMIO_TRANS(trans, _TRANSA_HDCP2_AUTH, \ + _TRANSB_HDCP2_AUTH) #define AUTH_LINK_AUTHENTICATED BIT(31) #define AUTH_LINK_TYPE BIT(30) #define AUTH_FORCE_CLR_INPUTCTR BIT(19) #define AUTH_CLR_KEYS BIT(18) - -#define HDCP2_CTL_DDI(port) _PORT_HDCP2_BASE(port, 0xB0) +#define HDCP2_AUTH(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_AUTH(trans) : \ + PORT_HDCP2_AUTH(port)) + +#define PORT_HDCP2_CTL(port) _PORT_HDCP2_BASE(port, 0xB0) +#define _TRANSA_HDCP2_CTL 0x664B0 +#define _TRANSB_HDCP2_CTL 0x665B0 +#define TRANS_HDCP2_CTL(trans) _MMIO_TRANS(trans, _TRANSA_HDCP2_CTL, \ + _TRANSB_HDCP2_CTL) #define CTL_LINK_ENCRYPTION_REQ BIT(31) - -#define HDCP2_STATUS_DDI(port) _PORT_HDCP2_BASE(port, 0xB4) -#define STREAM_ENCRYPTION_STATUS_A BIT(31) -#define STREAM_ENCRYPTION_STATUS_B BIT(30) -#define STREAM_ENCRYPTION_STATUS_C BIT(29) +#define HDCP2_CTL(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_CTL(trans) : \ + PORT_HDCP2_CTL(port)) + +#define PORT_HDCP2_STATUS(port) _PORT_HDCP2_BASE(port, 0xB4) +#define _TRANSA_HDCP2_STATUS 0x664B4 +#define _TRANSB_HDCP2_STATUS 0x665B4 +#define TRANS_HDCP2_STATUS(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP2_STATUS, \ + _TRANSB_HDCP2_STATUS) #define LINK_TYPE_STATUS BIT(22) #define LINK_AUTH_STATUS BIT(21) #define LINK_ENCRYPTION_STATUS BIT(20) +#define HDCP2_STATUS(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_STATUS(trans) : \ + PORT_HDCP2_STATUS(port)) /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 From b047463c852272ef9956ad3a4c706f78f8b06c17 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 30 Aug 2019 11:58:48 +0300 Subject: [PATCH 068/331] drm/i915: Remove link to missing "Batchbuffer Pools" documentation The referenced documentation section has been removed. Remove the link to avoid warning when building the documentation. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190830085849.12519-1-joonas.lahtinen@linux.intel.com --- Documentation/gpu/i915.rst | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 3415255ad3dca..7d1f656128566 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -358,15 +358,6 @@ Batchbuffer Parsing .. kernel-doc:: drivers/gpu/drm/i915/i915_cmd_parser.c :internal: -Batchbuffer Pools ------------------ - -.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c - :doc: batch pool - -.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c - :internal: - User Batchbuffer Execution -------------------------- From 4072761b981c630acbc6195391e5a22bae114e39 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 30 Aug 2019 11:58:49 +0300 Subject: [PATCH 069/331] drm/i915: Indent GuC/WOPCM documentation sections Indent GuC/WOPCM documentation correctly to reside under "Memory Management and Command Submission" section to avoid it escaping to the upper level navigation. Signed-off-by: Joonas Lahtinen Cc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190830085849.12519-2-joonas.lahtinen@linux.intel.com --- Documentation/gpu/i915.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 7d1f656128566..e249ea7b0ec70 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -407,31 +407,31 @@ Object Tiling IOCTLs :doc: buffer object tiling WOPCM -===== +----- WOPCM Layout ------------- +~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/i915/intel_wopcm.c :doc: WOPCM Layout GuC -=== +--- Firmware Layout -------------------- +~~~~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h :doc: Firmware Layout GuC-specific firmware loader ----------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c :internal: GuC-based command submission ----------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :doc: GuC-based command submission @@ -440,7 +440,7 @@ GuC-based command submission :internal: GuC Address Space ------------------ +~~~~~~~~~~~~~~~~~ .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c :doc: GuC Address Space From ed3126fa0d393825f2f134668b31363581035825 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 29 Aug 2019 14:15:23 -0700 Subject: [PATCH 070/331] drm/i915: parameterize south hpd macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit South, follow the north. Instead of defining separate macros for each port, make them take port as parameter as done for TC ports and for north engine. This will allow us to easily extend this as needed. tgp_ddi_port_hotplug_long_detect() is also removed as after the EHL introduction the tgp variant is an exact copy of icp. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190829211526.30525-1-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_irq.c | 22 ++++---------------- drivers/gpu/drm/i915/i915_reg.h | 36 +++++++++++---------------------- 2 files changed, 16 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3f1b6ee157baf..084e322ec15b9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1401,11 +1401,11 @@ static bool icp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { case HPD_PORT_A: - return val & ICP_DDIA_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_A); case HPD_PORT_B: - return val & ICP_DDIB_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_B); case HPD_PORT_C: - return val & TGP_DDIC_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_C); default: return false; } @@ -1427,20 +1427,6 @@ static bool icp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) } } -static bool tgp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) -{ - switch (pin) { - case HPD_PORT_A: - return val & ICP_DDIA_HPD_LONG_DETECT; - case HPD_PORT_B: - return val & ICP_DDIB_HPD_LONG_DETECT; - case HPD_PORT_C: - return val & TGP_DDIC_HPD_LONG_DETECT; - default: - return false; - } -} - static bool tgp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { @@ -2318,7 +2304,7 @@ static void tgp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, ddi_hotplug_trigger, dig_hotplug_reg, hpd_tgp, - tgp_ddi_port_hotplug_long_detect); + icp_ddi_port_hotplug_long_detect); } if (tc_hotplug_trigger) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3cfdab18c0cfa..946beb9e295e8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7928,26 +7928,13 @@ enum { * SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC. */ -#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) -#define TGP_DDIC_HPD_ENABLE (1 << 11) -#define TGP_DDIC_HPD_STATUS_MASK (3 << 8) -#define TGP_DDIC_HPD_NO_DETECT (0 << 8) -#define TGP_DDIC_HPD_SHORT_DETECT (1 << 8) -#define TGP_DDIC_HPD_LONG_DETECT (2 << 8) -#define TGP_DDIC_HPD_SHORT_LONG_DETECT (3 << 8) -#define ICP_DDIB_HPD_ENABLE (1 << 7) -#define ICP_DDIB_HPD_STATUS_MASK (3 << 4) -#define ICP_DDIB_HPD_NO_DETECT (0 << 4) -#define ICP_DDIB_HPD_SHORT_DETECT (1 << 4) -#define ICP_DDIB_HPD_LONG_DETECT (2 << 4) -#define ICP_DDIB_HPD_SHORT_LONG_DETECT (3 << 4) -#define ICP_DDIA_HPD_ENABLE (1 << 3) -#define ICP_DDIA_HPD_OP_DRIVE_1 (1 << 2) -#define ICP_DDIA_HPD_STATUS_MASK (3 << 0) -#define ICP_DDIA_HPD_NO_DETECT (0 << 0) -#define ICP_DDIA_HPD_SHORT_DETECT (1 << 0) -#define ICP_DDIA_HPD_LONG_DETECT (2 << 0) -#define ICP_DDIA_HPD_SHORT_LONG_DETECT (3 << 0) +#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) +#define SHOTPLUG_CTL_DDI_HPD_ENABLE(port) (0x8 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(port) (0x3 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_NO_DETECT(port) (0x0 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(port) (0x1 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(port) (0x2 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_SHORT_LONG_DETECT(port) (0x3 << (4 * (port))) #define SHOTPLUG_CTL_TC _MMIO(0xc4034) #define ICP_TC_HPD_ENABLE(tc_port) (8 << (tc_port) * 4) @@ -8058,14 +8045,15 @@ enum { #define ICP_TC_HPD_LONG_DETECT(tc_port) (2 << (tc_port) * 4) #define ICP_TC_HPD_SHORT_DETECT(tc_port) (1 << (tc_port) * 4) -#define ICP_DDI_HPD_ENABLE_MASK (ICP_DDIB_HPD_ENABLE | \ - ICP_DDIA_HPD_ENABLE) +#define ICP_DDI_HPD_ENABLE_MASK (SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A)) #define ICP_TC_HPD_ENABLE_MASK (ICP_TC_HPD_ENABLE(PORT_TC4) | \ ICP_TC_HPD_ENABLE(PORT_TC3) | \ ICP_TC_HPD_ENABLE(PORT_TC2) | \ ICP_TC_HPD_ENABLE(PORT_TC1)) -#define TGP_DDI_HPD_ENABLE_MASK (TGP_DDIC_HPD_ENABLE | \ - ICP_DDI_HPD_ENABLE_MASK) +#define TGP_DDI_HPD_ENABLE_MASK (SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_C) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A)) #define TGP_TC_HPD_ENABLE_MASK (ICP_TC_HPD_ENABLE(PORT_TC6) | \ ICP_TC_HPD_ENABLE(PORT_TC5) | \ ICP_TC_HPD_ENABLE_MASK) From 58676af69c2eb1e0202215fcfc1d421f186226ed Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 29 Aug 2019 14:15:24 -0700 Subject: [PATCH 071/331] drm/i915: unify icp, tgp and mcc irq handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The differences are only on the pins, trigger and long_detect function. The MCC handling is already partially merged, so merge TGP as well. Remove the pins argument from icp_irq_handler() so we have all the differences between the 3 set in a common if ladder. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190829211526.30525-2-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_irq.c | 65 ++++++++------------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 084e322ec15b9..5f590987dcd58 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2243,19 +2243,27 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) cpt_serr_int_handler(dev_priv); } -static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir, - const u32 *pins) +static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - u32 ddi_hotplug_trigger; - u32 tc_hotplug_trigger; + u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; + bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); + const u32 *pins; - if (HAS_PCH_MCC(dev_priv)) { + if (HAS_PCH_TGP(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; + tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; + tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; + pins = hpd_tgp; + } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; + pins = hpd_mcc; } else { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; + tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; + pins = hpd_icp; } if (ddi_hotplug_trigger) { @@ -2279,44 +2287,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir, intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, tc_hotplug_trigger, dig_hotplug_reg, pins, - icp_tc_port_hotplug_long_detect); - } - - if (pin_mask) - intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); - - if (pch_iir & SDE_GMBUS_ICP) - gmbus_irq_handler(dev_priv); -} - -static void tgp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) -{ - u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; - u32 tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; - u32 pin_mask = 0, long_mask = 0; - - if (ddi_hotplug_trigger) { - u32 dig_hotplug_reg; - - dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_DDI); - I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg); - - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - ddi_hotplug_trigger, - dig_hotplug_reg, hpd_tgp, - icp_ddi_port_hotplug_long_detect); - } - - if (tc_hotplug_trigger) { - u32 dig_hotplug_reg; - - dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_TC); - I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg); - - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - tc_hotplug_trigger, - dig_hotplug_reg, hpd_tgp, - tgp_tc_port_hotplug_long_detect); + tc_port_hotplug_long_detect); } if (pin_mask) @@ -2767,12 +2738,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) I915_WRITE(SDEIIR, iir); ret = IRQ_HANDLED; - if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) - tgp_irq_handler(dev_priv, iir); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_MCC) - icp_irq_handler(dev_priv, iir, hpd_mcc); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_irq_handler(dev_priv, iir, hpd_icp); + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + icp_irq_handler(dev_priv, iir); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) spt_irq_handler(dev_priv, iir); else From b32821c036310c49cdc45639b1d451b21fc3f17b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 29 Aug 2019 14:15:25 -0700 Subject: [PATCH 072/331] drm/i915: parameterize SDE hotplug registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ice Lake, Tiger Lake and Elkhart Lake all have different port configurations and all of them can be parameterized the same way to form the SDE hotplug bitmask. Avoid making them a special case an just use the parameterized macros. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190829211526.30525-3-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_irq.c | 36 ++++++++++++++++----------------- drivers/gpu/drm/i915/i915_reg.h | 35 ++++++++++++++------------------ 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 5f590987dcd58..5413828321260 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -149,30 +149,30 @@ static const u32 hpd_gen12[HPD_NUM_PINS] = { }; static const u32 hpd_icp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP, - [HPD_PORT_D] = SDE_TC2_HOTPLUG_ICP, - [HPD_PORT_E] = SDE_TC3_HOTPLUG_ICP, - [HPD_PORT_F] = SDE_TC4_HOTPLUG_ICP + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4), }; static const u32 hpd_mcc[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), }; static const u32 hpd_tgp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_DDIC_HOTPLUG_TGP, - [HPD_PORT_D] = SDE_TC1_HOTPLUG_ICP, - [HPD_PORT_E] = SDE_TC2_HOTPLUG_ICP, - [HPD_PORT_F] = SDE_TC3_HOTPLUG_ICP, - [HPD_PORT_G] = SDE_TC4_HOTPLUG_ICP, - [HPD_PORT_H] = SDE_TC5_HOTPLUG_TGP, - [HPD_PORT_I] = SDE_TC6_HOTPLUG_TGP, + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(PORT_C), + [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_G] = SDE_TC_HOTPLUG_ICP(PORT_TC4), + [HPD_PORT_H] = SDE_TC_HOTPLUG_ICP(PORT_TC5), + [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 946beb9e295e8..53735433fe203 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7839,29 +7839,24 @@ enum { SDE_FDI_RXA_CPT) /* south display engine interrupt: ICP/TGP */ -#define SDE_TC6_HOTPLUG_TGP (1 << 29) -#define SDE_TC5_HOTPLUG_TGP (1 << 28) -#define SDE_TC4_HOTPLUG_ICP (1 << 27) -#define SDE_TC3_HOTPLUG_ICP (1 << 26) -#define SDE_TC2_HOTPLUG_ICP (1 << 25) -#define SDE_TC1_HOTPLUG_ICP (1 << 24) #define SDE_GMBUS_ICP (1 << 23) -#define SDE_DDIC_HOTPLUG_TGP (1 << 18) -#define SDE_DDIB_HOTPLUG_ICP (1 << 17) -#define SDE_DDIA_HOTPLUG_ICP (1 << 16) #define SDE_TC_HOTPLUG_ICP(tc_port) (1 << ((tc_port) + 24)) #define SDE_DDI_HOTPLUG_ICP(port) (1 << ((port) + 16)) -#define SDE_DDI_MASK_ICP (SDE_DDIB_HOTPLUG_ICP | \ - SDE_DDIA_HOTPLUG_ICP) -#define SDE_TC_MASK_ICP (SDE_TC4_HOTPLUG_ICP | \ - SDE_TC3_HOTPLUG_ICP | \ - SDE_TC2_HOTPLUG_ICP | \ - SDE_TC1_HOTPLUG_ICP) -#define SDE_DDI_MASK_TGP (SDE_DDIC_HOTPLUG_TGP | \ - SDE_DDI_MASK_ICP) -#define SDE_TC_MASK_TGP (SDE_TC6_HOTPLUG_TGP | \ - SDE_TC5_HOTPLUG_TGP | \ - SDE_TC_MASK_ICP) +#define SDE_DDI_MASK_ICP (SDE_DDI_HOTPLUG_ICP(PORT_B) | \ + SDE_DDI_HOTPLUG_ICP(PORT_A)) +#define SDE_TC_MASK_ICP (SDE_TC_HOTPLUG_ICP(PORT_TC4) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC3) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC2) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC1)) +#define SDE_DDI_MASK_TGP (SDE_DDI_HOTPLUG_ICP(PORT_C) | \ + SDE_DDI_HOTPLUG_ICP(PORT_B) | \ + SDE_DDI_HOTPLUG_ICP(PORT_A)) +#define SDE_TC_MASK_TGP (SDE_TC_HOTPLUG_ICP(PORT_TC6) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC5) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC4) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC3) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC2) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC1)) #define SDEISR _MMIO(0xc4000) #define SDEIMR _MMIO(0xc4004) From 40e98130c32889e4ed128d129b824107b90501fe Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 29 Aug 2019 14:15:26 -0700 Subject: [PATCH 073/331] drm/i915: unify icp, tgp and mcc irq setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a single function to setup the SDE irq and make MCC, ICP and TGP use it, just like was done for the irq handler. Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190829211526.30525-4-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_irq.c | 50 ++++++++++++++------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 5413828321260..135c9ee55e078 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3385,42 +3385,31 @@ static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, } } -static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, + u32 sde_ddi_mask, u32 sde_tc_mask, + u32 ddi_enable_mask, u32 tc_enable_mask, + const u32 *pins) { u32 hotplug_irqs, enabled_irqs; - hotplug_irqs = SDE_DDI_MASK_ICP | SDE_TC_MASK_ICP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_icp); + hotplug_irqs = sde_ddi_mask | sde_tc_mask; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE_MASK); + icp_hpd_detection_setup(dev_priv, ddi_enable_mask, tc_enable_mask); } +/* + * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only the + * equivalent of SDE. + */ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, enabled_irqs; - - hotplug_irqs = SDE_DDI_MASK_TGP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_mcc); - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); -} - -static void tgp_hpd_irq_setup(struct drm_i915_private *dev_priv) -{ - u32 hotplug_irqs, enabled_irqs; - - hotplug_irqs = SDE_DDI_MASK_TGP | SDE_TC_MASK_TGP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_tgp); - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, - TGP_TC_HPD_ENABLE_MASK); + icp_hpd_irq_setup(dev_priv, + SDE_DDI_MASK_TGP, 0, + TGP_DDI_HPD_ENABLE_MASK, 0, + hpd_mcc); } static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3460,9 +3449,13 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) gen11_hpd_detection_setup(dev_priv); if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) - tgp_hpd_irq_setup(dev_priv); + icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, + TGP_DDI_HPD_ENABLE_MASK, + TGP_TC_HPD_ENABLE_MASK, hpd_tgp); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_hpd_irq_setup(dev_priv); + icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, + ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE_MASK, hpd_icp); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -4340,7 +4333,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; } else { if (HAS_PCH_MCC(dev_priv)) - /* EHL doesn't need most of gen11_hpd_irq_setup */ dev_priv->display.hpd_irq_setup = mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11) dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; From 3dc007fe9b2b256d2779bb9e9a0d372eb1d90643 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 30 Aug 2019 18:59:58 +0100 Subject: [PATCH 074/331] drm/i915/gtt: Downgrade gen7 (ivb, byt, hsw) back to aliasing-ppgtt With the upcoming change in timing (dramatically reducing the latency between manipulating the ppGTT and execution), no amount of tweaking could save Baytrail, it would always fail to invalidate its TLB. Ville was right, Baytrail is beyond hope. v2: Rollback on all gen7; same timing instability on TLB invalidation. Signed-off-by: Chris Wilson Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190830180000.24608-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 63 +++++++--------------- drivers/gpu/drm/i915/i915_pci.c | 4 +- 2 files changed, 20 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 601c16239fdf0..ac55a0d054bdb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1741,46 +1741,22 @@ static int remap_l3(struct i915_request *rq) static int switch_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; - struct i915_address_space *vm = vm_alias(rq->hw_context); - unsigned int unwind_mm = 0; - u32 hw_flags = 0; + struct intel_context *ce = rq->hw_context; + struct i915_address_space *vm = vm_alias(ce); int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int loops; - - /* - * Baytail takes a little more convincing that it really needs - * to reload the PD between contexts. It is not just a little - * longer, as adding more stalls after the load_pd_dir (i.e. - * adding a long loop around flush_pd_dir) is not as effective - * as reloading the PD umpteen times. 32 is derived from - * experimentation (gem_exec_parallel/fds) and has no good - * explanation. - */ - loops = 1; - if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) - loops = 32; - - do { - ret = load_pd_dir(rq, ppgtt); - if (ret) - goto err; - } while (--loops); - - if (ppgtt->pd_dirty_engines & engine->mask) { - unwind_mm = engine->mask; - ppgtt->pd_dirty_engines &= ~unwind_mm; - hw_flags = MI_FORCE_RESTORE; - } + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm)); + if (ret) + return ret; } - if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS0); + if (ce->state) { + u32 hw_flags; + + GEM_BUG_ON(rq->engine->id != RCS0); /* * The kernel context(s) is treated as pure scratch and is not @@ -1789,22 +1765,25 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ + hw_flags = 0; if (i915_gem_context_is_kernel(rq->gem_context)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) - goto err_mm; + return ret; } if (vm) { + struct intel_engine_cs *engine = rq->engine; + ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = flush_pd_dir(rq); if (ret) - goto err_mm; + return ret; /* * Not only do we need a full barrier (post-sync write) after @@ -1816,24 +1795,18 @@ static int switch_context(struct i915_request *rq) */ ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = engine->emit_flush(rq, EMIT_FLUSH); if (ret) - goto err_mm; + return ret; } ret = remap_l3(rq); if (ret) - goto err_mm; + return ret; return 0; - -err_mm: - if (unwind_mm) - i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm; -err: - return ret; } static int ring_request_alloc(struct i915_request *request) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1974e4c78a439..18212c39a9baa 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -420,7 +420,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ - .ppgtt_type = INTEL_PPGTT_FULL, \ + .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ @@ -476,7 +476,7 @@ static const struct intel_device_info intel_valleyview_info = { .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 31, .has_snoop = true, .has_coherent_ggtt = false, From 0b718ba1e884f64dce27c19311dd2859b87e56b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 30 Aug 2019 18:59:59 +0100 Subject: [PATCH 075/331] drm/i915/gtt: Downgrade Cherryview back to aliasing-ppgtt With the upcoming change in timing (dramatically reducing the latency between manipulating the ppGTT and execution), no amount of tweaking could save Cherryview, it would always fail to invalidate its TLB. Signed-off-by: Chris Wilson Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190830180000.24608-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 69 +++++------------------------ drivers/gpu/drm/i915/i915_pci.c | 2 +- 2 files changed, 11 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7d460b1842ddd..0e28263c7b87c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1871,60 +1871,6 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } -static int emit_pdps(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm); - int err, i; - u32 *cs; - - GEM_BUG_ON(intel_vgpu_active(rq->i915)); - - /* - * Beware ye of the dragons, this sequence is magic! - * - * Small changes to this sequence can cause anything from - * GPU hangs to forcewake errors and machine lockups! - */ - - /* Flush any residual operations from the context load */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Ensure the LRI have landed before we invalidate & continue */ - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - u32 base = engine->mmio_base; - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - /* Be doubly sure the LRI have landed before proceeding */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Re-invalidate the TLB for luck */ - return engine->emit_flush(rq, EMIT_INVALIDATE); -} - static int execlists_request_alloc(struct i915_request *request) { int ret; @@ -1947,10 +1893,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(request->hw_context->vm)) - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - else - ret = emit_pdps(request); + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret; @@ -3167,12 +3110,20 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); +} + static void execlists_init_reg_state(u32 *regs, struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); + struct i915_ppgtt *ppgtt = vm_alias(ce->vm); bool rcs = engine->class == RENDER_CLASS; u32 base = engine->mmio_base; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 18212c39a9baa..fbe98a2db88e1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -570,7 +570,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 32, .has_reset_engine = 1, .has_snoop = true, From c1d143dd2ac8d481500fec4c0d715ff301d2766f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 30 Aug 2019 19:00:00 +0100 Subject: [PATCH 076/331] drm/i915: Remove ppgtt->dirty_engines This is no longer used anywhere and so can be removed. However, tracking the dirty status on the ppgtt doesn't work very well if the ppgtt is shared, so perhaps for the best that it is no longer required. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190830180000.24608-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 +--------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3c50cea76dc5c..ee51fd1a62074 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -823,17 +823,6 @@ release_pd_entry(struct i915_page_directory * const pd, return free; } -/* - * PDE TLBs are a pain to invalidate on GEN8+. When we modify - * the page table structures, we mark them dirty so that - * context switching/execlist queuing code takes extra steps - * to ensure that tlbs are flushed. - */ -static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) -{ - ppgtt->pd_dirty_engines = ALL_ENGINES; -} - static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *dev_priv = ppgtt->vm.i915; @@ -1735,10 +1724,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, } spin_unlock(&pd->lock); - if (flush) { - mark_tlbs_dirty(&ppgtt->base); + if (flush) gen6_ggtt_invalidate(vm->gt->ggtt); - } goto out; @@ -1833,7 +1820,6 @@ static int pd_vma_bind(struct i915_vma *vma, gen6_for_all_pdes(pt, ppgtt->base.pd, pde) gen6_write_pde(ppgtt, pde, pt); - mark_tlbs_dirty(&ppgtt->base); gen6_ggtt_invalidate(ggtt); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 576f6fb95d135..07c85c134d4c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -422,7 +422,6 @@ struct i915_ggtt { struct i915_ppgtt { struct i915_address_space vm; - intel_engine_mask_t pd_dirty_engines; struct i915_page_directory *pd; }; From aabbe344dc3ca5f7d8263a02608ba6179e8a4499 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 30 Aug 2019 19:03:25 +0100 Subject: [PATCH 077/331] drm/i915: Use RCU for unlocked vm_idr lookup Since i915_address_space is now RCU protected, we can do the vm_idr lookup without taking the vm_idr_mutex, just with the rcu_read_lock() instead. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190830180325.7755-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1f735ca9b1736..b8969605f4e89 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1057,14 +1057,11 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (upper_32_bits(args->value)) return -ENOENT; - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - + rcu_read_lock(); vm = idr_find(&file_priv->vm_idr, args->value); - if (vm) - i915_vm_get(vm); - mutex_unlock(&file_priv->vm_idr_lock); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); if (!vm) return -ENOENT; From 7bff9779d76917adfa7c67712aa6ae3b7e5098ba Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 30 Aug 2019 12:16:44 +0200 Subject: [PATCH 078/331] drm/i915: Fix regression with crtc disable ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we moved the code to disable crtc's to a separate patch, we forgot to ensure that for_each_oldnew_intel_crtc_in_state_reverse() was moved as well. Signed-off-by: Maarten Lankhorst Fixes: 66d9cec8a6c9 ("drm/i915/display: Move the commit_tail() disable sequence to separate function") Cc: Ville Syrjälä Cc: Manasi Navare Cc: José Roberto de Souza Reviewed-by: Manasi Navare Reviewed-by: José Roberto de Souza Signed-off-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190830101644.8740-1-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 21 ++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b38d842ff6ec6..e661e20991184 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13784,7 +13784,15 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state) struct intel_crtc *crtc; int i; - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + /* + * Disable CRTC/pipes in reverse order because some features(MST in + * TGL+) requires master and slave relationship between pipes, so it + * should always pick the lowest pipe as master as it will be enabled + * first and disable in the reverse order so the master will be the + * last one to be disabled. + */ + for_each_oldnew_intel_crtc_in_state_reverse(state, crtc, old_crtc_state, + new_crtc_state, i) { if (!needs_modeset(new_crtc_state)) continue; @@ -13963,15 +13971,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) if (state->modeset) wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); - /* - * Disable CRTC/pipes in reverse order because some features(MST in - * TGL+) requires master and slave relationship between pipes, so it - * should always pick the lowest pipe as master as it will be enabled - * first and disable in the reverse order so the master will be the - * last one to be disabled. - */ - for_each_oldnew_intel_crtc_in_state_reverse(state, crtc, old_crtc_state, - new_crtc_state, i) { + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { if (needs_modeset(new_crtc_state) || new_crtc_state->update_pipe) { From 385ba629aa1cdca5373c1fdbf6693ade5062ad27 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Aug 2019 17:48:28 -0700 Subject: [PATCH 079/331] drm/i915: Allow /2 CD2X divider on gen11+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bspec has just recently been updated with new cdclk values that require the use of a /2 CD2X divider rather than a /1 divider. Once we add the divider selection logic to ICL+ cdclk programming, we have pretty much the same logic we were already using on CNL, so it's simpler to drop icl_set_cdclk() completely and reuse cnl_set_cdclk() on gen11+ platforms as well. v2: - Using ICL_CDCLK_CD2X_PIPE_NONE + BXT_CDCLK_CD2X_PIPE(pipe) for TGL is correct, but looks really confusing. Add some TGL_ macros that alias these to avoid confusion. (Ville) - Use DIV_ROUND_CLOSEST rather than / when applying the divider. (Ville) Cc: José Roberto de Souza Cc: Lucas De Marchi Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190830004828.19359-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 90 +++++++++------------- drivers/gpu/drm/i915/i915_reg.h | 3 + 2 files changed, 38 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 939088c7d8143..58ba42dcf23f9 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1659,10 +1659,23 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, cnl_cdclk_pll_enable(dev_priv, vco); val = divider | skl_cdclk_decimal(cdclk); - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); + + if (INTEL_GEN(dev_priv) >= 12) { + if (pipe == INVALID_PIPE) + val |= TGL_CDCLK_CD2X_PIPE_NONE; + else + val |= TGL_CDCLK_CD2X_PIPE(pipe); + } else if (INTEL_GEN(dev_priv) >= 11) { + if (pipe == INVALID_PIPE) + val |= ICL_CDCLK_CD2X_PIPE_NONE; + else + val |= ICL_CDCLK_CD2X_PIPE(pipe); + } else { + if (pipe == INVALID_PIPE) + val |= BXT_CDCLK_CD2X_PIPE_NONE; + else + val |= BXT_CDCLK_CD2X_PIPE(pipe); + } I915_WRITE(CDCLK_CTL, val); if (pipe != INVALID_PIPE) @@ -1813,51 +1826,6 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) return dev_priv->cdclk.hw.ref * ratio; } -static void icl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state, - enum pipe pipe) -{ - unsigned int cdclk = cdclk_state->cdclk; - unsigned int vco = cdclk_state->vco; - int ret; - - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); - - /* - * On ICL CD2X_DIV can only be 1, so we'll never end up changing the - * divider here synchronized to a pipe while CDCLK is on, nor will we - * need the corresponding vblank wait. - */ - I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | - skl_cdclk_decimal(cdclk)); - - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, - cdclk_state->voltage_level); - - intel_update_cdclk(dev_priv); - - /* - * Can't read out the voltage level :( - * Let's just assume everything is as expected. - */ - dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; -} - static u8 icl_calc_voltage_level(struct drm_i915_private *dev_priv, int cdclk) { if (IS_ELKHARTLAKE(dev_priv)) { @@ -1881,6 +1849,7 @@ static void icl_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { u32 val; + int div; cdclk_state->bypass = 50000; @@ -1914,10 +1883,21 @@ static void icl_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; - val = I915_READ(CDCLK_CTL); - WARN_ON((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0); + val = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + switch (val) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + default: + MISSING_CASE(val); + div = 2; + break; + } - cdclk_state->cdclk = cdclk_state->vco / 2; + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); out: /* @@ -1963,7 +1943,7 @@ static void icl_init_cdclk(struct drm_i915_private *dev_priv) icl_calc_voltage_level(dev_priv, sanitized_state.cdclk); - icl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); + cnl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); } static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) @@ -1975,7 +1955,7 @@ static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.voltage_level = icl_calc_voltage_level(dev_priv, cdclk_state.cdclk); - icl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } static void cnl_init_cdclk(struct drm_i915_private *dev_priv) @@ -2810,7 +2790,7 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) >= 11) { - dev_priv->display.set_cdclk = icl_set_cdclk; + dev_priv->display.set_cdclk = cnl_set_cdclk; dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->display.set_cdclk = cnl_set_cdclk; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53735433fe203..6c43b8c583bbb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9759,7 +9759,10 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe) << 20) #define CDCLK_DIVMUX_CD_OVERRIDE (1 << 19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) +#define ICL_CDCLK_CD2X_PIPE(pipe) (_PICK(pipe, 0, 2, 6) << 19) #define ICL_CDCLK_CD2X_PIPE_NONE (7 << 19) +#define TGL_CDCLK_CD2X_PIPE(pipe) BXT_CDCLK_CD2X_PIPE(pipe) +#define TGL_CDCLK_CD2X_PIPE_NONE ICL_CDCLK_CD2X_PIPE_NONE #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1 << 16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) From 3d1da92baffe69cf847699ceccf4297356da58fa Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 26 Aug 2019 15:55:40 -0700 Subject: [PATCH 080/331] drm/i915: Add 324mhz and 326.4mhz cdclks for gen11+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bspec was recently updated with these new cdclk values for ICL, EHL, and TGL. Bspec: 20598 Bspec: 49201 Cc: José Roberto de Souza Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190826225540.11987-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 58ba42dcf23f9..76f11d465e91e 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1761,8 +1761,10 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) static int icl_calc_cdclk(int min_cdclk, unsigned int ref) { - static const int ranges_24[] = { 180000, 192000, 312000, 552000, 648000 }; - static const int ranges_19_38[] = { 172800, 192000, 307200, 556800, 652800 }; + static const int ranges_24[] = { 180000, 192000, 312000, 324000, + 552000, 648000 }; + static const int ranges_19_38[] = { 172800, 192000, 307200, 326400, + 556800, 652800 }; const int *ranges; int len, i; @@ -1803,6 +1805,7 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) /* fall through */ case 172800: case 307200: + case 326400: case 556800: case 652800: WARN_ON(dev_priv->cdclk.hw.ref != 19200 && @@ -1810,6 +1813,7 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) break; case 180000: case 312000: + case 324000: case 552000: case 648000: WARN_ON(dev_priv->cdclk.hw.ref != 24000); From dffa8feb308455f9b3ce0eeb55a4eac3afc0786b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 30 Aug 2019 19:19:29 +0100 Subject: [PATCH 081/331] drm/i915/perf: Assert locking for i915_init_oa_perf_state() We use the context->pin_mutex to serialise updates to the OA config and the registers values written into each new context. Document this relationship and assert we do hold the context->pin_mutex as used by gen8_configure_all_contexts() to serialise updates to the OA config itself. v2: Add a white-lie for when we call intel_gt_resume() from init. v3: Lie while we have the context pinned inside atomic reset. Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Reviewed-by: Lionel Landwerlin #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190830181929.18663-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 7 ++++++- drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_perf.c | 3 +++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 1363e069ec830..aa6cf0152ce74 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "i915_params.h" +#include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -142,8 +143,12 @@ int intel_gt_resume(struct intel_gt *gt) intel_engine_pm_get(engine); ce = engine->kernel_context; - if (ce) + if (ce) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); + mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_); ce->ops->reset(ce); + mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); + } engine->serial++; /* kernel context lost */ err = engine->resume(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0e28263c7b87c..87b7473a6dfb1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2383,6 +2383,11 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) ce = rq->hw_context; GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); + + /* Proclaim we have exclusive access to the context image! */ + GEM_BUG_ON(!intel_context_is_pinned(ce)); + mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_); + rq = active_request(rq); if (!rq) { ce->ring->head = ce->ring->tail; @@ -2442,6 +2447,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); + mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -3920,6 +3926,9 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, u32 head, bool scrub) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); + mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_); + /* * We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -3944,6 +3953,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); + mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2c9f46e126223..c1b7642337615 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2305,6 +2305,9 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, { struct i915_perf_stream *stream; + /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ + lockdep_assert_held(&ce->pin_mutex); + if (engine->class != RENDER_CLASS) return; From 75427b2a2bffc083d51dec389c235722a9c69b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 28 Aug 2019 13:20:59 +0300 Subject: [PATCH 082/331] drm/i915: Limit MST to <= 8bpc once again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My attempt at allowing MST to use the higher color depths has regressed some configurations. Apparently people have setups where all MST streams will fit into the DP link with 8bpc but won't fit with higher color depths. What we really should be doing is reducing the bpc for all the streams on the same link until they start to fit. But that requires a bit more work, so in the meantime let's revert back closer to the old behavior and limit MST to at most 8bpc. Cc: stable@vger.kernel.org Cc: Lyude Paul Tested-by: Geoffrey Bennett Fixes: f1477219869c ("drm/i915: Remove the 8bpc shackles from DP MST") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111505 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190828102059.2512-1-ville.syrjala@linux.intel.com Reviewed-by: Lyude Paul --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2c5ac3dd647fd..6df240a01b8c3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -128,7 +128,15 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, limits.max_lane_count = intel_dp_max_lane_count(intel_dp); limits.min_bpp = intel_dp_min_bpp(pipe_config); - limits.max_bpp = pipe_config->pipe_bpp; + /* + * FIXME: If all the streams can't fit into the link with + * their current pipe_bpp we should reduce pipe_bpp across + * the board until things start to fit. Until then we + * limit to <= 8bpc since that's what was hardcoded for all + * MST streams previously. This hack should be removed once + * we have the proper retry logic in place. + */ + limits.max_bpp = min(pipe_config->pipe_bpp, 24); intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); From 66a990dd0c49b534b2396934c1659b3fef34233d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 30 Aug 2019 21:27:19 +0300 Subject: [PATCH 083/331] drm/i915: Prefer encoder->name over port_name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit enum port is a mess now because it no longer matches the spec at all. Let's start to dig ourselves out of this hole by reducing our reliance on port_name(). This should at least make a bunch of debug messages a bit more sensible while we think how to fill the the hole properly. Based on the following cocci script with a lot of manual cleanup (all the format strings etc.): @@ expression E; @@ ( - port_name(E->port) + E->base.base.id, E->base.name | - port_name(E.port) + E.base.base.id, E.base.name ) @@ enum port P; expression E; @@ P = E->port <... - port_name(P) + E->base.base.id, E->base.name ...> @@ enum port P; expression E; @@ P = E.port <... - port_name(P) + E.base.base.id, E.base.name ...> @@ expression E; @@ { - enum port P = E; ... when != P } Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190830182719.32608-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_audio.c | 10 +- drivers/gpu/drm/i915/display/intel_ddi.c | 19 +-- drivers/gpu/drm/i915/display/intel_display.c | 4 +- drivers/gpu/drm/i915/display/intel_dp.c | 122 ++++++++++-------- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 4 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 9 +- drivers/gpu/drm/i915/display/intel_hotplug.c | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 5 +- 8 files changed, 102 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ddcccf4408c37..aac089c79ceb6 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -560,8 +560,9 @@ static void ilk_audio_codec_disable(struct intel_encoder *encoder, u32 tmp, eldv; i915_reg_t aud_config, aud_cntrl_st2; - DRM_DEBUG_KMS("Disable audio codec on port %c, pipe %c\n", - port_name(port), pipe_name(pipe)); + DRM_DEBUG_KMS("Disable audio codec on [ENCODER:%d:%s], pipe %c\n", + encoder->base.base.id, encoder->base.name, + pipe_name(pipe)); if (WARN_ON(port == PORT_A)) return; @@ -609,8 +610,9 @@ static void ilk_audio_codec_enable(struct intel_encoder *encoder, int len, i; i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2; - DRM_DEBUG_KMS("Enable audio codec on port %c, pipe %c, %u bytes ELD\n", - port_name(port), pipe_name(pipe), drm_eld_size(eld)); + DRM_DEBUG_KMS("Enable audio codec on [ENCODER:%d:%s], pipe %c, %u bytes ELD\n", + encoder->base.base.id, encoder->base.name, + pipe_name(pipe), drm_eld_size(eld)); if (WARN_ON(port == PORT_A)) return; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 3180dacb5be49..1fe0bf01e580e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2080,18 +2080,20 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, } if (!*pipe_mask) - DRM_DEBUG_KMS("No pipe for ddi port %c found\n", - port_name(port)); + DRM_DEBUG_KMS("No pipe for [ENCODER:%d:%s] found\n", + encoder->base.base.id, encoder->base.name); if (!mst_pipe_mask && hweight8(*pipe_mask) > 1) { - DRM_DEBUG_KMS("Multiple pipes for non DP-MST port %c (pipe_mask %02x)\n", - port_name(port), *pipe_mask); + DRM_DEBUG_KMS("Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n", + encoder->base.base.id, encoder->base.name, + *pipe_mask); *pipe_mask = BIT(ffs(*pipe_mask) - 1); } if (mst_pipe_mask && mst_pipe_mask != *pipe_mask) - DRM_DEBUG_KMS("Conflicting MST and non-MST encoders for port %c (pipe_mask %02x mst_pipe_mask %02x)\n", - port_name(port), *pipe_mask, mst_pipe_mask); + DRM_DEBUG_KMS("Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n", + encoder->base.base.id, encoder->base.name, + *pipe_mask, mst_pipe_mask); else *is_dp_mst = mst_pipe_mask; @@ -2101,8 +2103,9 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) - DRM_ERROR("Port %c enabled but PHY powered down? " - "(PHY_CTL %08x)\n", port_name(port), tmp); + DRM_ERROR("[ENCODER:%d:%s] enabled but PHY powered down? " + "(PHY_CTL %08x)\n", encoder->base.base.id, + encoder->base.name, tmp); } intel_display_power_put(dev_priv, encoder->power_domain, wakeref); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e661e20991184..a03dc6eb61f84 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1612,8 +1612,8 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, if (intel_de_wait_for_register(dev_priv, dpll_reg, port_mask, expected_mask, 1000)) - WARN(1, "timed out waiting for port %c ready: got 0x%x, expected 0x%x\n", - port_name(dport->base.port), + WARN(1, "timed out waiting for [ENCODER:%d:%s] port ready: got 0x%x, expected 0x%x\n", + dport->base.base.base.id, dport->base.base.name, I915_READ(dpll_reg) & port_mask, expected_mask); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index aaa90992a7f0e..4e3e696e7fc84 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -639,12 +639,14 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) u32 DP; if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, - "skipping pipe %c power sequencer kick due to port %c being active\n", - pipe_name(pipe), port_name(intel_dig_port->base.port))) + "skipping pipe %c power sequencer kick due to [ENCODER:%d:%s] being active\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name)) return; - DRM_DEBUG_KMS("kicking pipe %c power sequencer for port %c\n", - pipe_name(pipe), port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("kicking pipe %c power sequencer for [ENCODER:%d:%s]\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); /* Preserve the BIOS-computed detected bit. This is * supposed to be read-only. @@ -762,9 +764,10 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) vlv_steal_power_sequencer(dev_priv, pipe); intel_dp->pps_pipe = pipe; - DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n", + DRM_DEBUG_KMS("picked pipe %c power sequencer for [ENCODER:%d:%s]\n", pipe_name(intel_dp->pps_pipe), - port_name(intel_dig_port->base.port)); + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(intel_dp); @@ -872,13 +875,16 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) /* didn't find one? just let vlv_power_sequencer_pipe() pick one when needed */ if (intel_dp->pps_pipe == INVALID_PIPE) { - DRM_DEBUG_KMS("no initial power sequencer for port %c\n", - port_name(port)); + DRM_DEBUG_KMS("no initial power sequencer for [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); return; } - DRM_DEBUG_KMS("initial power sequencer for port %c: pipe %c\n", - port_name(port), pipe_name(intel_dp->pps_pipe)); + DRM_DEBUG_KMS("initial power sequencer for [ENCODER:%d:%s]: pipe %c\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name, + pipe_name(intel_dp->pps_pipe)); intel_dp_init_panel_power_sequencer(intel_dp); intel_dp_init_panel_power_sequencer_registers(intel_dp, false); @@ -2492,8 +2498,9 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) intel_display_power_get(dev_priv, intel_aux_power_domain(intel_dig_port)); - DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD on\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); if (!edp_have_panel_power(intel_dp)) wait_panel_power_cycle(intel_dp); @@ -2512,8 +2519,9 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) * If the panel wasn't on, delay before accessing aux channel */ if (!edp_have_panel_power(intel_dp)) { - DRM_DEBUG_KMS("eDP port %c panel power wasn't enabled\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("[ENCODER:%d:%s] panel power wasn't enabled\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); msleep(intel_dp->panel_power_up_delay); } @@ -2538,8 +2546,9 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) vdd = false; with_pps_lock(intel_dp, wakeref) vdd = edp_panel_vdd_on(intel_dp); - I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", - port_name(dp_to_dig_port(intel_dp)->base.port)); + I915_STATE_WARN(!vdd, "[ENCODER:%d:%s] VDD already requested on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); } static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) @@ -2557,8 +2566,9 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if (!edp_have_panel_vdd(intel_dp)) return; - DRM_DEBUG_KMS("Turning eDP port %c VDD off\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD off\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); pp = ironlake_get_pp_control(intel_dp); pp &= ~EDP_FORCE_VDD; @@ -2620,8 +2630,9 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) if (!intel_dp_is_edp(intel_dp)) return; - I915_STATE_WARN(!intel_dp->want_panel_vdd, "eDP port %c VDD not forced on", - port_name(dp_to_dig_port(intel_dp)->base.port)); + I915_STATE_WARN(!intel_dp->want_panel_vdd, "[ENCODER:%d:%s] VDD not forced on", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); intel_dp->want_panel_vdd = false; @@ -2642,12 +2653,14 @@ static void edp_panel_on(struct intel_dp *intel_dp) if (!intel_dp_is_edp(intel_dp)) return; - DRM_DEBUG_KMS("Turn eDP port %c panel power on\n", - port_name(dp_to_dig_port(intel_dp)->base.port)); + DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); if (WARN(edp_have_panel_power(intel_dp), - "eDP port %c panel power already on\n", - port_name(dp_to_dig_port(intel_dp)->base.port))) + "[ENCODER:%d:%s] panel power already on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name)) return; wait_panel_power_cycle(intel_dp); @@ -2702,11 +2715,11 @@ static void edp_panel_off(struct intel_dp *intel_dp) if (!intel_dp_is_edp(intel_dp)) return; - DRM_DEBUG_KMS("Turn eDP port %c panel power off\n", - port_name(dig_port->base.port)); + DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power off\n", + dig_port->base.base.base.id, dig_port->base.base.name); - WARN(!intel_dp->want_panel_vdd, "Need eDP port %c VDD to turn off panel\n", - port_name(dig_port->base.port)); + WARN(!intel_dp->want_panel_vdd, "Need [ENCODER:%d:%s] VDD to turn off panel\n", + dig_port->base.base.base.id, dig_port->base.base.name); pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some @@ -2851,8 +2864,8 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state) bool cur_state = I915_READ(intel_dp->output_reg) & DP_PORT_EN; I915_STATE_WARN(cur_state != state, - "DP port %c state assertion failure (expected %s, current %s)\n", - port_name(dig_port->base.port), + "[ENCODER:%d:%s] state assertion failure (expected %s, current %s)\n", + dig_port->base.base.base.id, dig_port->base.base.name, onoff(state), onoff(cur_state)); } #define assert_dp_port_disabled(d) assert_dp_port((d), false) @@ -3430,8 +3443,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) * port select always when logically disconnecting a power sequencer * from a port. */ - DRM_DEBUG_KMS("detaching pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("detaching pipe %c power sequencer from [ENCODER:%d:%s]\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); I915_WRITE(pp_on_reg, 0); POSTING_READ(pp_on_reg); @@ -3447,17 +3461,18 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, for_each_intel_dp(&dev_priv->drm, encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = encoder->port; WARN(intel_dp->active_pipe == pipe, - "stealing pipe %c power sequencer from active (e)DP port %c\n", - pipe_name(pipe), port_name(port)); + "stealing pipe %c power sequencer from active [ENCODER:%d:%s]\n", + pipe_name(pipe), encoder->base.base.id, + encoder->base.name); if (intel_dp->pps_pipe != pipe) continue; - DRM_DEBUG_KMS("stealing pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(port)); + DRM_DEBUG_KMS("stealing pipe %c power sequencer from [ENCODER:%d:%s]\n", + pipe_name(pipe), encoder->base.base.id, + encoder->base.name); /* make sure vdd is off before we steal it */ vlv_detach_power_sequencer(intel_dp); @@ -3499,8 +3514,9 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, /* now it's all ours */ intel_dp->pps_pipe = crtc->pipe; - DRM_DEBUG_KMS("initializing pipe %c power sequencer for port %c\n", - pipe_name(intel_dp->pps_pipe), port_name(encoder->port)); + DRM_DEBUG_KMS("initializing pipe %c power sequencer for [ENCODER:%d:%s]\n", + pipe_name(intel_dp->pps_pipe), encoder->base.base.id, + encoder->base.name); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(intel_dp); @@ -4321,9 +4337,10 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) &dp_to_dig_port(intel_dp)->base; bool sink_can_mst = intel_dp_sink_can_mst(intel_dp); - DRM_DEBUG_KMS("MST support? port %c: %s, sink: %s, modparam: %s\n", - port_name(encoder->port), yesno(intel_dp->can_mst), - yesno(sink_can_mst), yesno(i915_modparams.enable_dp_mst)); + DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support? port: %s, sink: %s, modparam: %s\n", + encoder->base.base.id, encoder->base.name, + yesno(intel_dp->can_mst), yesno(sink_can_mst), + yesno(i915_modparams.enable_dp_mst)); if (!intel_dp->can_mst) return; @@ -6284,13 +6301,15 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) * would end up in an endless cycle of * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..." */ - DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("ignoring long hpd on eDP [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); return IRQ_HANDLED; } - DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", - port_name(intel_dig_port->base.port), + DRM_DEBUG_KMS("got hpd irq on [ENCODER:%d:%s] - %s\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name, long_hpd ? "long" : "short"); if (long_hpd) { @@ -7154,8 +7173,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp_modeset_retry_work_fn); if (WARN(intel_dig_port->max_lanes < 1, - "Not enough lanes (%d) for DP on port %c\n", - intel_dig_port->max_lanes, port_name(port))) + "Not enough lanes (%d) for DP on [ENCODER:%d:%s]\n", + intel_dig_port->max_lanes, intel_encoder->base.base.id, + intel_encoder->base.name)) return false; intel_dp_set_source_rates(intel_dp); @@ -7196,9 +7216,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, port != PORT_B && port != PORT_C)) return false; - DRM_DEBUG_KMS("Adding %s connector on port %c\n", - type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", - port_name(port)); + DRM_DEBUG_KMS("Adding %s connector on [ENCODER:%d:%s]\n", + type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", + intel_encoder->base.base.id, intel_encoder->base.name); drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index b8148f838354c..98288edf88f0e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2910,8 +2910,8 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, has_dpll4 ? DPLL_ID_EHL_DPLL4 : DPLL_ID_ICL_DPLL1); if (!port_dpll->pll) { - DRM_DEBUG_KMS("No combo PHY PLL found for port %c\n", - port_name(encoder->port)); + DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n", + encoder->base.base.id, encoder->base.name); return false; } diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f6bb77fd09303..d1e26a00c6426 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3075,12 +3075,13 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_encoder->port; - DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", - port_name(port)); + DRM_DEBUG_KMS("Adding HDMI connector on [ENCODER:%d:%s]\n", + intel_encoder->base.base.id, intel_encoder->base.name); if (WARN(intel_dig_port->max_lanes < 4, - "Not enough lanes (%d) for HDMI on port %c\n", - intel_dig_port->max_lanes, port_name(port))) + "Not enough lanes (%d) for HDMI on [ENCODER:%d:%s]\n", + intel_dig_port->max_lanes, intel_encoder->base.base.id, + intel_encoder->base.name)) return; drm_connector_init(dev, connector, &intel_hdmi_connector_funcs, diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 56be20f6f47e3..fc29046d48ead 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -481,7 +481,8 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, long_hpd = long_mask & BIT(pin); - DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), + DRM_DEBUG_DRIVER("digital hpd on [ENCODER:%d:%s] - %s\n", + encoder->base.base.id, encoder->base.name, long_hpd ? "long" : "short"); queue_dig = true; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5e81c4fc13aed..9749369173296 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3114,8 +3114,9 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) if (!intel_dig_port->dp.can_mst) continue; - seq_printf(m, "MST Source Port %c\n", - port_name(intel_dig_port->base.port)); + seq_printf(m, "MST Source Port [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr); } drm_connector_list_iter_end(&conn_iter); From 9e362992ff34cab695e669ef1e3f4f65463087e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 28 Aug 2019 21:34:24 +0300 Subject: [PATCH 084/331] drm/i915: Clean up HDMI deep color handling a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reogranize the HDMI deep color state computation to just loop over possible bpc values. Avoids having to maintain so many variants of the clock etc. The current code also looks confused w.r.t. port_clock vs. bw_constrained. It would happily update port_clock for deep color but then not actually enable deep color due to bw_constrained being set. The new logic handles that case correctly. v2: Pull stuff into separate funcs (Jani) Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190828183424.7856-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_hdmi.c | 134 +++++++++++++--------- 1 file changed, 77 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index d1e26a00c6426..640254feef27d 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2265,9 +2265,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, static bool intel_hdmi_ycbcr420_config(struct drm_connector *connector, - struct intel_crtc_state *config, - int *clock_12bpc, int *clock_10bpc, - int *clock_8bpc) + struct intel_crtc_state *config) { struct intel_crtc *intel_crtc = to_intel_crtc(config->base.crtc); @@ -2276,11 +2274,6 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector, return false; } - /* YCBCR420 TMDS rate requirement is half the pixel clock */ - config->port_clock /= 2; - *clock_12bpc /= 2; - *clock_10bpc /= 2; - *clock_8bpc /= 2; config->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; /* YCBCR 420 output conversion needs a scaler */ @@ -2295,6 +2288,76 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector, return true; } +static int intel_hdmi_port_clock(int clock, int bpc) +{ + /* + * Need to adjust the port link by: + * 1.5x for 12bpc + * 1.25x for 10bpc + */ + return clock * bpc / 8; +} + +static int intel_hdmi_compute_bpc(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + int clock, bool force_dvi) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + int bpc; + + for (bpc = 12; bpc >= 10; bpc -= 2) { + if (hdmi_deep_color_possible(crtc_state, bpc) && + hdmi_port_clock_valid(intel_hdmi, + intel_hdmi_port_clock(clock, bpc), + true, force_dvi) == MODE_OK) + return bpc; + } + + return 8; +} + +static int intel_hdmi_compute_clock(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + bool force_dvi) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + int bpc, clock = adjusted_mode->crtc_clock; + + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) + clock *= 2; + + /* YCBCR420 TMDS rate requirement is half the pixel clock */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + clock /= 2; + + bpc = intel_hdmi_compute_bpc(encoder, crtc_state, + clock, force_dvi); + + crtc_state->port_clock = intel_hdmi_port_clock(clock, bpc); + + /* + * pipe_bpp could already be below 8bpc due to + * FDI bandwidth constraints. We shouldn't bump it + * back up to 8bpc in that case. + */ + if (crtc_state->pipe_bpp > bpc * 3) + crtc_state->pipe_bpp = bpc * 3; + + DRM_DEBUG_KMS("picking %d bpc for HDMI output (pipe bpp: %d)\n", + bpc, crtc_state->pipe_bpp); + + if (hdmi_port_clock_valid(intel_hdmi, crtc_state->port_clock, + false, force_dvi) != MODE_OK) { + DRM_DEBUG_KMS("unsupported HDMI clock (%d kHz), rejecting mode\n", + crtc_state->port_clock); + return -EINVAL; + } + + return 0; +} + int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2306,11 +2369,8 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_scdc *scdc = &connector->display_info.hdmi.scdc; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; - int clock_10bpc = clock_8bpc * 5 / 4; - int clock_12bpc = clock_8bpc * 3 / 2; - int desired_bpp; bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI; + int ret; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -2332,17 +2392,11 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } - if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) { + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; - clock_8bpc *= 2; - clock_10bpc *= 2; - clock_12bpc *= 2; - } if (drm_mode_is_420_only(&connector->display_info, adjusted_mode)) { - if (!intel_hdmi_ycbcr420_config(connector, pipe_config, - &clock_12bpc, &clock_10bpc, - &clock_8bpc)) { + if (!intel_hdmi_ycbcr420_config(connector, pipe_config)) { DRM_ERROR("Can't support YCBCR420 output\n"); return -EINVAL; } @@ -2359,43 +2413,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, intel_conn_state->force_audio == HDMI_AUDIO_ON; } - /* - * Note that g4x/vlv don't support 12bpc hdmi outputs. We also need - * to check that the higher clock still fits within limits. - */ - if (hdmi_deep_color_possible(pipe_config, 12) && - hdmi_port_clock_valid(intel_hdmi, clock_12bpc, - true, force_dvi) == MODE_OK) { - DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); - desired_bpp = 12*3; - - /* Need to adjust the port link by 1.5x for 12bpc. */ - pipe_config->port_clock = clock_12bpc; - } else if (hdmi_deep_color_possible(pipe_config, 10) && - hdmi_port_clock_valid(intel_hdmi, clock_10bpc, - true, force_dvi) == MODE_OK) { - DRM_DEBUG_KMS("picking bpc to 10 for HDMI output\n"); - desired_bpp = 10 * 3; - - /* Need to adjust the port link by 1.25x for 10bpc. */ - pipe_config->port_clock = clock_10bpc; - } else { - DRM_DEBUG_KMS("picking bpc to 8 for HDMI output\n"); - desired_bpp = 8*3; - - pipe_config->port_clock = clock_8bpc; - } - - if (!pipe_config->bw_constrained) { - DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp); - pipe_config->pipe_bpp = desired_bpp; - } - - if (hdmi_port_clock_valid(intel_hdmi, pipe_config->port_clock, - false, force_dvi) != MODE_OK) { - DRM_DEBUG_KMS("unsupported HDMI clock, rejecting mode\n"); - return -EINVAL; - } + ret = intel_hdmi_compute_clock(encoder, pipe_config, force_dvi); + if (ret) + return ret; /* Set user selected PAR to incoming mode's member */ adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio; From 8f5e2b306b4e26db0ec65e0a27b232b179c9122b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Sep 2019 05:02:43 +0100 Subject: [PATCH 085/331] drm/i915: Restrict the aliasing-ppgtt to the size of the ggtt The aliasing-ppgtt is not allowed to be smaller than the ggtt, nor should we advertise it as being any bigger, or else we may get sued for false advertisement. Testcase: igt/gem_exec_big Fixes: 0b718ba1e884 ("drm/i915/gtt: Downgrade Cherryview back to aliasing-ppgtt") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190902040303.14195-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ee51fd1a62074..906dc6fff3832 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2597,6 +2597,8 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + ppgtt->vm.total = ggtt->vm.total; + return 0; err_ppgtt: From 4f36ef2ee1876b2e145327f03f675a0577f258cb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Sep 2019 05:02:44 +0100 Subject: [PATCH 086/331] drm/i915: Report aliasing ppgtt size as ggtt size The aliasing-ppgtt is constrained to be the same size as the Global GTT since it aliases the same address space. Simplifying gtt size reporting in this case. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190902040303.14195-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index b8969605f4e89..f1c0e5d958f3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2231,8 +2231,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->size = 0; if (ctx->vm) args->value = ctx->vm->total; - else if (to_i915(dev)->ggtt.alias) - args->value = to_i915(dev)->ggtt.alias->vm.total; else args->value = to_i915(dev)->ggtt.vm.total; break; From 5a90606df7cb73eceb46897a87154e94b31af93a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Sep 2019 05:02:47 +0100 Subject: [PATCH 087/331] drm/i915: Replace obj->pin_global with obj->frontbuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit obj->pin_global was originally used as a means to keep the shrinker off the active scanout, but we use the vma->pin_count itself for that and the obj->frontbuffer to delay shrinking active framebuffers. The other role that obj->pin_global gained was for spotting display objects inside GEM and working harder to keep those coherent; for which we can again simply inspect obj->frontbuffer directly. Coming up next, we will want to manipulate the pin_global counter outside of the principle locks, so would need to make pin_global atomic. However, since obj->frontbuffer is already managed atomically, it makes sense to use that the primary key for display objects instead of having pin_global. Ville pointed out the principle difference is that obj->frontbuffer is set for as long as an intel_framebuffer is attached to an object, but obj->pin_global was only raised for as long as the object was active. In practice, this means that we consider the object as being on the scanout for longer than is strictly required, causing us to be more proactive in flushing -- though it should be true that we would have flushed eventually when the back became the front, except that on the flip path that flush is async but when hit from another ioctl it will be synchronous. v2: i915_gem_object_is_framebuffer() Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190902040303.14195-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 2 ++ .../gpu/drm/i915/display/intel_frontbuffer.c | 13 +++++-- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 34 ++++++------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 3 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 -- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 15 +++----- drivers/gpu/drm/i915/i915_debugfs.c | 12 ++----- 7 files changed, 31 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a03dc6eb61f84..12a94c6e3d517 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2080,6 +2080,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, u32 alignment; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) + return ERR_PTR(-EINVAL); alignment = intel_surf_alignment(fb, 0); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 719379774fa54..fc40dc1fdbcc4 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -220,11 +220,18 @@ static void frontbuffer_release(struct kref *ref) { struct intel_frontbuffer *front = container_of(ref, typeof(*front), ref); + struct drm_i915_gem_object *obj = front->obj; + struct i915_vma *vma; - front->obj->frontbuffer = NULL; - spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock); + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; + spin_unlock(&obj->vma.lock); - i915_gem_object_put(front->obj); + obj->frontbuffer = NULL; + spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock); + + i915_gem_object_put(obj); kfree(front); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 9c58e8fac1d97..6af740a5e3db1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -27,7 +27,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_global)) + if (!i915_gem_object_is_framebuffer(obj)) return; i915_gem_object_lock(obj); @@ -422,12 +422,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, assert_object_held(obj); - /* Mark the global pin early so that we account for the - * display coherency whilst setting up the cache domains. - */ - obj->pin_global++; - - /* The display engine is not coherent with the LLC cache on gen6. As + /* + * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all * chipsets. @@ -439,12 +435,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, ret = i915_gem_object_set_cache_level(obj, HAS_WT(to_i915(obj->base.dev)) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) { - vma = ERR_PTR(ret); - goto err_unpin_global; - } + if (ret) + return ERR_PTR(ret); - /* As the user may map the buffer once pinned in the display plane + /* + * As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. However, * it may simply be too big to fit into mappable, in which case @@ -461,22 +456,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); if (IS_ERR(vma)) - goto err_unpin_global; + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); __i915_gem_object_flush_for_display(obj); - /* It should now be out of any other write domains, and we can update + /* + * It should now be out of any other write domains, and we can update * the domain values for our changes. */ obj->read_domains |= I915_GEM_DOMAIN_GTT; return vma; - -err_unpin_global: - obj->pin_global--; - return vma; } static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) @@ -514,12 +506,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) assert_object_held(obj); - if (WARN_ON(obj->pin_global == 0)) - return; - - if (--obj->pin_global == 0) - vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - /* Bump the LRU to try and avoid premature eviction whilst flipping */ i915_gem_object_bump_inactive_ggtt(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 5efb9936e05b5..29b9eddc4c7f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -406,7 +406,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_global; /* currently in use by HW, keep flushed */ + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); } static inline void __start_cpu_write(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index ede0eb4218a81..13b9dc0e1a899 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -152,8 +152,6 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - /** Count of how many global VMA are currently pinned for use by HW */ - unsigned int pin_global; struct { struct mutex lock; /* protects the pages and their use */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index edd21d14e64ff..4e55cfc2b0dc6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -61,7 +61,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_shrinkable(obj)) return false; - /* Only report true if by unbinding the object and putting its pages + /* + * Only report true if by unbinding the object and putting its pages * we can actually make forward progress towards freeing physical * pages. * @@ -72,16 +73,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) return false; - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either + /* + * We can only return physical pages to the system if we can either * discard the contents (because the user has marked them as being * purgeable) or if we can move their contents out to swap. */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9749369173296..9798f27a697ac 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -77,11 +77,6 @@ static int i915_capabilities(struct seq_file *m, void *data) return 0; } -static char get_pin_flag(struct drm_i915_gem_object *obj) -{ - return obj->pin_global ? 'p' : ' '; -} - static char get_tiling_flag(struct drm_i915_gem_object *obj) { switch (i915_gem_object_get_tiling(obj)) { @@ -140,9 +135,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct i915_vma *vma; int pin_count = 0; - seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s", + seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s", &obj->base, - get_pin_flag(obj), get_tiling_flag(obj), get_global_flag(obj), get_pin_mapped_flag(obj), @@ -221,8 +215,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (pinned x %d)", pin_count); if (obj->stolen) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); - if (obj->pin_global) - seq_printf(m, " (global)"); + if (i915_gem_object_is_framebuffer(obj)) + seq_printf(m, " (fb)"); engine = i915_gem_object_last_write_engine(obj); if (engine) From f2690074462bdb99530179f41ae73b45e765b761 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 1 Sep 2019 12:04:31 +0100 Subject: [PATCH 088/331] drm/i915/selftests: Remove unused __engines_name() This function was never used and probably will never be used, so remove it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190901110431.12393-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index da54a718c7129..dc25bcc3e3723 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1546,21 +1546,6 @@ static int igt_vm_isolation(void *arg) return err; } -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - static bool skip_unused_engines(struct intel_context *ce, void *data) { return !ce->state; From 8f9fb61caed13e282e1e3387e64905b90cc65abd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Sep 2019 05:02:46 +0100 Subject: [PATCH 089/331] drm/i915: Refresh the errno to vmf_fault translations It's been a long time since we accidentally reported -EIO upon wedging, it can now only be generated by failure to swap in a page. Signed-off-by: Chris Wilson Cc: Abdiel Janulgue Reviewed-by: Abdiel Janulgue Link: https://patchwork.freedesktop.org/patch/msgid/20190902040303.14195-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 39 +++++++++--------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 261c9bd83f518..82db2b783123a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -287,6 +287,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); } + + /* The entire mappable GGTT is pinned? Unexpected! */ + GEM_BUG_ON(vma == ERR_PTR(-ENOSPC)); } if (IS_ERR(vma)) { ret = PTR_ERR(vma); @@ -333,23 +336,19 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) i915_gem_object_unpin_pages(obj); err: switch (ret) { - case -EIO: - /* - * We eat errors when the gpu is terminally wedged to avoid - * userspace unduly crashing (gl has no provisions for mmaps to - * fail). But any other -EIO isn't ours (e.g. swap in failure) - * and so needs to be reported. - */ - if (!intel_gt_is_wedged(ggtt->vm.gt)) - return VM_FAULT_SIGBUS; - /* else, fall through */ - case -EAGAIN: - /* - * EAGAIN means the gpu is hung and we'll wait for the error - * handler to reset everything when re-faulting in - * i915_mutex_lock_interruptible. - */ + default: + WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); + /* fallthrough */ + case -EIO: /* shmemfs failure from swap device */ + case -EFAULT: /* purged object */ + return VM_FAULT_SIGBUS; + + case -ENOSPC: /* shmemfs allocation failure */ + case -ENOMEM: /* our allocation failure */ + return VM_FAULT_OOM; + case 0: + case -EAGAIN: case -ERESTARTSYS: case -EINTR: case -EBUSY: @@ -358,14 +357,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) * already did the job. */ return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - case -EFAULT: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); - return VM_FAULT_SIGBUS; } } From b1a4383d1e6e6003fceba9aae14bda77bfe03dc3 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:52 +0530 Subject: [PATCH 090/331] drm/i915/display: Add debug log for color parameters Add debug log for color related parameters like gamma_mode, gamma_enable, csc_enable, etc inside intel_dump_pipe_config(). v6: -Added debug log for color para in intel_dump_pipe_config [Jani] v7: -Split patch 3 into 4 patches v8: -Corrected alignment [Uma] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-3-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 12a94c6e3d517..540dffa0a6015 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12140,6 +12140,15 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, intel_dpll_dump_hw_state(dev_priv, &pipe_config->dpll_hw_state); + if (IS_CHERRYVIEW(dev_priv)) + DRM_DEBUG_KMS("cgm_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n", + pipe_config->cgm_mode, pipe_config->gamma_mode, + pipe_config->gamma_enable, pipe_config->csc_enable); + else + DRM_DEBUG_KMS("csc_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n", + pipe_config->csc_mode, pipe_config->gamma_mode, + pipe_config->gamma_enable, pipe_config->csc_enable); + dump_planes: if (!state) return; From 145450f6a42df99523967a0da42bdb00dacb413b Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:51 +0530 Subject: [PATCH 091/331] drm/i915/display: Add func to get gamma bit precision Each platform supports different gamma modes and each gamma mode has different bit precision. Here bit precision corresponds to number of bits the hw LUT supports. Add func per platform to return bit precision corresponding to gamma mode which will be later used as a parameter in lut comparison function intel_color_lut_equal(). This is done for legacy, ilk, glk and their variant platforms. v6: -Added func intel_color_get_bit_precision() to get bit precision for gamma and degamma lut readout depending upon platform and corresponding to load_luts() [Ankit] -Made patch11 as patch3 [Jani] v7: -Renamed func intel_color_get_bit_precision() to intel_color_get_gamma_bit_precision() -Added separate function/platform for gamma bit precision [Ville] -Corrected checkpatch warnings v8: -Split patch 3 into 4 separate patches v9: -Changed commit message, gave more info [Uma] -Added precision func for icl+ platform v10: -Removed precision func for chv and icl+ platforms [Jani] -Added gamma_enable check once [Jani] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-2-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 60 ++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_color.h | 1 + 2 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 71a0201437a9a..b5c0c6583568a 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1371,6 +1371,66 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) return 0; } +static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 16; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!crtc_state->gamma_enable) + return 0; + + if (HAS_GMCH(dev_priv) && !IS_CHERRYVIEW(dev_priv)) + return i9xx_gamma_precision(crtc_state); + else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + return glk_gamma_precision(crtc_state); + else if (IS_IRONLAKE(dev_priv)) + return ilk_gamma_precision(crtc_state); + + return 0; +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h index 057e8ac635559..0226d3a784b6d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.h +++ b/drivers/gpu/drm/i915/display/intel_color.h @@ -14,5 +14,6 @@ int intel_color_check(struct intel_crtc_state *crtc_state); void intel_color_commit(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); void intel_color_get_config(struct intel_crtc_state *crtc_state); +int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_COLOR_H__ */ From e9c8f591445d7b99883889caab3c5d1aafcee360 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:53 +0530 Subject: [PATCH 092/331] drm/i915/display: Add func to compare hw/sw gamma lut Add func intel_color_lut_equal() to compare hw/sw gamma lut values. Since hw/sw gamma lut sizes and lut entries comparison will be different for different gamma modes, add gamma mode dependent checks. v3: -Rebase v4: -Renamed intel_compare_color_lut() to intel_color_lut_equal() [Jani] -Added the default label above the correct label [Jani] -Corrected smatch warn "variable dereferenced before check" [Dan Carpenter] v5: -Added condition (!blob1 && !blob2) return true [Jani] v6: -Made patch11 as patch3 [Jani] v8: -Split patch 3 into 4 patches -Optimized blob check condition [Ville] v9: -Exclude spilt gamma mode (bdw and ivb platforms) as there is exception in way gamma values are written in hardware [Ville] -Added exception made in commit [Uma] -Dropped else, character limit and indentation [Uma] -Added multi segmented gama mode for icl+ platforms [Uma] v10: -Dropped multi segmented mode for icl+ platforms [Jani] -Removed references of sw and hw state in compare code [Jani] -Dropped inline from func [Jani] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-4-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 72 ++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_color.h | 6 ++ 2 files changed, 78 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index b5c0c6583568a..1ab561d797066 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1431,6 +1431,78 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat return 0; } +static bool err_check(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, u32 err) +{ + return ((abs((long)lut2->red - lut1->red)) <= err) && + ((abs((long)lut2->blue - lut1->blue)) <= err) && + ((abs((long)lut2->green - lut1->green)) <= err); +} + +static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, + int lut_size, u32 err) +{ + int i; + + for (i = 0; i < lut_size; i++) { + if (!err_check(&lut1[i], &lut2[i], err)) + return false; + } + + return true; +} + +bool intel_color_lut_equal(struct drm_property_blob *blob1, + struct drm_property_blob *blob2, + u32 gamma_mode, u32 bit_precision) +{ + struct drm_color_lut *lut1, *lut2; + int lut_size1, lut_size2; + u32 err; + + if (!blob1 != !blob2) + return false; + + if (!blob1) + return true; + + lut_size1 = drm_color_lut_size(blob1); + lut_size2 = drm_color_lut_size(blob2); + + /* check sw and hw lut size */ + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (lut_size1 != lut_size2) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } + + lut1 = blob1->data; + lut2 = blob2->data; + + err = 0xffff >> bit_precision; + + /* check sw and hw lut entry to be equal */ + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (!intel_color_lut_entry_equal(lut1, lut2, + lut_size2, err)) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } + + return true; +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h index 0226d3a784b6d..173727aaa24d2 100644 --- a/drivers/gpu/drm/i915/display/intel_color.h +++ b/drivers/gpu/drm/i915/display/intel_color.h @@ -6,8 +6,11 @@ #ifndef __INTEL_COLOR_H__ #define __INTEL_COLOR_H__ +#include + struct intel_crtc_state; struct intel_crtc; +struct drm_property_blob; void intel_color_init(struct intel_crtc *crtc); int intel_color_check(struct intel_crtc_state *crtc_state); @@ -15,5 +18,8 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); void intel_color_get_config(struct intel_crtc_state *crtc_state); int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state); +bool intel_color_lut_equal(struct drm_property_blob *blob1, + struct drm_property_blob *blob2, + u32 gamma_mode, u32 bit_precision); #endif /* __INTEL_COLOR_H__ */ From 7e764059cf708f262aa3c406af91f9f8f79e7f52 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:54 +0530 Subject: [PATCH 093/331] drm/i915/display: Add macro to compare gamma hw/sw lut Add macro to compare hw/sw gamma lut values. First need to check whether hw/sw gamma mode matches or not. If not no need to compare lut values, if matches then only compare lut entries. v5: -Called PIPE_CONF_CHECK_COLOR_LUT inside if (!adjust) [Jani] -Added #undef PIPE_CONF_CHECK_COLOR_LUT [Jani] v8: -Added check for gamma mode before gamma lut entry comparison [Jani] -Split patch 3 into 4 patches Signed-off-by: Swati Sharma Reviewed-by: Uma Shankar Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-5-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 540dffa0a6015..06cf2171474d8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12531,6 +12531,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, { struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev); bool ret = true; + u32 bp_gamma = 0; bool fixup_inherited = fastset && (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); @@ -12682,6 +12683,24 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, } \ } while (0) +#define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ + if (current_config->name1 != pipe_config->name1) { \ + pipe_config_mismatch(fastset, __stringify(name1), \ + "(expected %i, found %i, won't compare lut values)\n", \ + current_config->name1, \ + pipe_config->name1); \ + ret = false;\ + } else { \ + if (!intel_color_lut_equal(current_config->name2, \ + pipe_config->name2, pipe_config->name1, \ + bit_precision)) { \ + pipe_config_mismatch(fastset, __stringify(name2), \ + "hw_state doesn't match sw_state\n"); \ + ret = false; \ + } \ + } \ +} while (0) + #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -12777,6 +12796,11 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_X(csc_mode); PIPE_CONF_CHECK_BOOL(gamma_enable); PIPE_CONF_CHECK_BOOL(csc_enable); + + bp_gamma = intel_color_get_gamma_bit_precision(pipe_config); + if (bp_gamma) + PIPE_CONF_CHECK_COLOR_LUT(gamma_mode, base.gamma_lut, bp_gamma); + } PIPE_CONF_CHECK_BOOL(double_wide); @@ -12839,6 +12863,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY +#undef PIPE_CONF_CHECK_COLOR_LUT #undef PIPE_CONF_QUIRK return ret; From 1af22383829864299102ca0c2eab458f755a9971 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:55 +0530 Subject: [PATCH 094/331] drm/i915/display: Extract i9xx_read_luts() For the legacy(gen < 4) gamma, add hw read out to create hw blob of gamma lut values. Also, add function intel_color_lut_pack to convert hw value with given bit precision to lut property val. v4: -No need to initialize *blob [Jani] -Removed right shifts [Jani] -Dropped dev local var [Jani] v5: -Returned blob instead of assigning it internally within the function [Ville] -Renamed function i9xx_get_color_config() to i9xx_read_luts() -Renamed i9xx_get_config_internal() to i9xx_read_lut_8() [Ville] v9: -Change in commit message [Jani, Uma] -Wrap commit within 75 characters [Uma] -Use macro for 256 [Uma] -Made read func para as const [Ville, Uma] v10: -Made i9xx_read_luts() static [Jani] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-6-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 54 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 3 ++ 2 files changed, 57 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 1ab561d797066..55076def6a98f 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1503,6 +1503,59 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, return true; } +/* convert hw value with given bit_precision to lut property val */ +static u32 intel_color_lut_pack(u32 val, u32 bit_precision) +{ + u32 max = 0xffff >> (16 - bit_precision); + + val = clamp_val(val, 0, max); + + if (bit_precision < 16) + val <<= 16 - bit_precision; + + return val; +} + +static struct drm_property_blob * +i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < LEGACY_LUT_LENGTH; i++) { + if (HAS_GMCH(dev_priv)) + val = I915_READ(PALETTE(pipe, i)); + else + val = I915_READ(LGC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_RED_MASK, val), 8); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_GREEN_MASK, val), 8); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_BLUE_MASK, val), 8); + } + + return blob; +} + +static void i9xx_read_luts(struct intel_crtc_state *crtc_state) +{ + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1523,6 +1576,7 @@ void intel_color_init(struct intel_crtc *crtc) dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = i9xx_load_luts; + dev_priv->display.read_luts = i9xx_read_luts; } } else { if (INTEL_GEN(dev_priv) >= 11) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6c43b8c583bbb..aac717fa4e25f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7192,6 +7192,9 @@ enum { /* legacy palette */ #define _LGC_PALETTE_A 0x4a000 #define _LGC_PALETTE_B 0x4a800 +#define LGC_PALETTE_RED_MASK REG_GENMASK(23, 16) +#define LGC_PALETTE_GREEN_MASK REG_GENMASK(15, 8) +#define LGC_PALETTE_BLUE_MASK REG_GENMASK(7, 0) #define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4) /* ilk/snb precision palette */ From 6b97b118d4d542c7bc25b725c6de3947fffb921b Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:56 +0530 Subject: [PATCH 095/331] drm/i915/display: Extract ilk_read_luts() For ilk, add hw read out to create hw blob of gamma lut values. v4: -No need to initialize *blob [Jani] -Removed right shifts [Jani] -Dropped dev local var [Jani] v5: -Returned blob instead of assigning it internally within the function [Ville] -Renamed ilk_get_color_config() to ilk_read_luts() [Ville] v9: -80 character limit [Uma] -Made read func para as const [Ville, Uma] -Renamed ilk_read_gamma_lut() to ilk_read_lut_10() [Uma, Ville] v10: -Made ilk_read_luts() static [Jani] -ilk_load_lut_10 has lut_size, not (lut_size - 1) [Jani] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-7-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 45 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 3 ++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 55076def6a98f..80f82b2bd2848 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1556,6 +1556,47 @@ static void i9xx_read_luts(struct intel_crtc_state *crtc_state) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); } +static struct drm_property_blob * +ilk_read_lut_10(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size; i++) { + val = I915_READ(PREC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_RED_MASK, val), 10); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_BLUE_MASK, val), 10); + } + + return blob; +} + +static void ilk_read_luts(struct intel_crtc_state *crtc_state) +{ + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = ilk_read_lut_10(crtc_state); +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1603,8 +1644,10 @@ void intel_color_init(struct intel_crtc *crtc) dev_priv->display.load_luts = bdw_load_luts; else if (INTEL_GEN(dev_priv) >= 7) dev_priv->display.load_luts = ivb_load_luts; - else + else { dev_priv->display.load_luts = ilk_load_luts; + dev_priv->display.read_luts = ilk_read_luts; + } } drm_crtc_enable_color_mgmt(&crtc->base, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index aac717fa4e25f..6ebb76a430e46 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7200,6 +7200,9 @@ enum { /* ilk/snb precision palette */ #define _PREC_PALETTE_A 0x4b000 #define _PREC_PALETTE_B 0x4c000 +#define PREC_PALETTE_RED_MASK REG_GENMASK(29, 20) +#define PREC_PALETTE_GREEN_MASK REG_GENMASK(19, 10) +#define PREC_PALETTE_BLUE_MASK REG_GENMASK(9, 0) #define PREC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _PREC_PALETTE_A, _PREC_PALETTE_B) + (i) * 4) #define _PREC_PIPEAGCMAX 0x4d000 From 4bb6a9d5d9a8289673c4cb0786d44be8a63c21db Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Wed, 4 Sep 2019 00:52:57 +0530 Subject: [PATCH 096/331] drm/i915/display: Extract glk_read_luts() For glk, add hw read out to create hw blob of gamma lut values. v4: -No need to initialize *blob [Jani] -Removed right shifts [Jani] -Dropped dev local var [Jani] v5: -Returned blob instead of assigning it internally within the function [Ville] -Renamed glk_get_color_config() to glk_read_luts() [Ville] -Added degamma validation [Ville] v9: -80 character limit [Uma] -Made read func para as const [Ville, Uma] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1567538578-4489-8-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 51 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 3 ++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 80f82b2bd2848..6d641e1598993 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1597,6 +1597,52 @@ static void ilk_read_luts(struct intel_crtc_state *crtc_state) crtc_state->base.gamma_lut = ilk_read_lut_10(crtc_state); } +static struct drm_property_blob * +glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int hw_lut_size = ivb_lut_10_size(prec_index); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | + PAL_PREC_AUTO_INCREMENT); + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * hw_lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < hw_lut_size; i++) { + val = I915_READ(PREC_PAL_DATA(pipe)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_RED_MASK, val), 10); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_BLUE_MASK, val), 10); + } + + I915_WRITE(PREC_PAL_INDEX(pipe), 0); + + return blob; +} + +static void glk_read_luts(struct intel_crtc_state *crtc_state) +{ + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1638,9 +1684,10 @@ void intel_color_init(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 11) dev_priv->display.load_luts = icl_load_luts; - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { dev_priv->display.load_luts = glk_load_luts; - else if (INTEL_GEN(dev_priv) >= 8) + dev_priv->display.read_luts = glk_read_luts; + } else if (INTEL_GEN(dev_priv) >= 8) dev_priv->display.load_luts = bdw_load_luts; else if (INTEL_GEN(dev_priv) >= 7) dev_priv->display.load_luts = ivb_load_luts; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6ebb76a430e46..45ed96d7c5994 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10351,6 +10351,9 @@ enum skl_power_gate { #define _PAL_PREC_GC_MAX_A 0x4A410 #define _PAL_PREC_GC_MAX_B 0x4AC10 #define _PAL_PREC_GC_MAX_C 0x4B410 +#define PREC_PAL_DATA_RED_MASK REG_GENMASK(29, 20) +#define PREC_PAL_DATA_GREEN_MASK REG_GENMASK(19, 10) +#define PREC_PAL_DATA_BLUE_MASK REG_GENMASK(9, 0) #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 From 9d7b01e93526efe79dbf75b69cc5972b5a4f7b37 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2019 11:07:07 +0100 Subject: [PATCH 097/331] drm/i915: Restore relaxed padding (OCL_OOB_SUPPRES_ENABLE) for skl+ This bit was fliped on for "syncing dependencies between camera and graphics". BSpec has no recollection why, and it is causing unrecoverable GPU hangs with Vulkan compute workloads. From BSpec, setting bit5 to 0 enables relaxed padding requirements for buffers, 1D and 2D non-array, non-MSAA, non-mip-mapped linear surfaces; and *must* be set to 0h on skl+ to ensure "Out of Bounds" case is suppressed. Reported-by: Jason Ekstrand Suggested-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110998 Fixes: 8424171e135c ("drm/i915/gen9: h/w w/a: syncing dependencies between camera and graphics") Signed-off-by: Chris Wilson Tested-by: denys.kostin@globallogic.com Cc: Jason Ekstrand Cc: Mika Kuoppala Cc: # v4.1+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190904100707.7377-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 8639fcccdb420..243d3f77be13f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -297,11 +297,6 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, From e838bfa8e170415fa3cc8e83ecb171e809c0c422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 3 Sep 2019 18:40:18 +0300 Subject: [PATCH 098/331] Revert "drm/i915: Fix DP-MST crtc_mask" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4eaceea3a00f8e936a7f48dcd0c975a57f88930f. Several userspace clients (modesetting ddx and mutter+wayland at least) handle encoder.possible_crtcs incorrectly. What they essentially do is the following: possible_crtcs = ~0; for_each_possible_encoder(connector) possible_crtcs &= encoder->possible_crtcs; Ie. they calculate the intersection of the possible_crtcs for the connector when they really should be calculating the union instead. In our case each MST encoder now has just one unique bit set, and so the intersection is always zero. The end result is that MST connectors can't be lit up because no crtc can be found to drive them. I've submitted a fix for the modesetting ddx [1], and complained on #wayland about mutter, so hopefully the situation will improve in the future. In the meantime we have regression, and so must go back to the old way of misconfiguring possible_crtcs in the kernel. [1] https://gitlab.freedesktop.org/xorg/xserver/merge_requests/277 Cc: Jonas Ådahl Cc: Stanislav Lisovskiy Cc: Lionel Landwerlin Cc: Dhinakaran Pandiyan Cc: Lucas De Marchi Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111507 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190903154018.26357-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 6df240a01b8c3..37366f81255b4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -615,7 +615,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; - intel_encoder->crtc_mask = BIT(pipe); + intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; intel_encoder->compute_config = intel_dp_mst_compute_config; From ab016914984e2a7405756dfc8a8e4cb54a4c1b48 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 20 Aug 2019 21:54:51 +0200 Subject: [PATCH 099/331] drm/i915: disable set/get_tiling ioctl on gen12+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cpu (de)tiler hw is gone, this stopped being useful. Plus it never supported any of the fancy new tiling formats, which means userspace also stopped using the magic side-channel this provides. This would totally break a lot of the igts, but they're already broken for the same reasons as userspace on gen12 would be. v2: Look at ggtt->num_fences instead, that also avoids the need for a comment (Chris). This also means that gen12 support really needs to make sure num_fences is set to 0. There is a patch for that, but it checks for HAS_MAPPABLE_APERTURE, which I'm not sure is the right thing really. Adding relevant people. Cc: Daniele Ceraolo Spurio Cc: Stuart Summers Cc: Matthew Auld Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Chris Wilson Cc: Lucas De Marchi Signed-off-by: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: José Roberto de Souza Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190820195451.15671-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ca0c2f4517428..e5d1ae8d4dbad 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -313,10 +313,14 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_set_tiling *args = data; struct drm_i915_gem_object *obj; int err; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -402,6 +406,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = -ENOENT; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (obj) { From 2f3b87124b9f08518b43abf2266035dd22fdbd3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 4 Sep 2019 14:34:14 -0700 Subject: [PATCH 100/331] drm/i915/psr: Only handle interruptions of the transcoder in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was enabling and checking PSR interruptions in every transcoder while it should keep the interruptions on the non-used transcoders masked. While doing this it gives us trouble on Tiger Lake if we are reading/writing to registers of disabled transcoders since from gen12 onwards the registers are relative to the transcoder. Instead of forcing them ON to access those registers, just avoid the accesses as they are not needed. v2 (Lucas): - Explain why we can't keep accessing all transcoders - Remove TODO about extending the irq handling to multiple instances: when/if implementing multiple instances it's pretty clear by the singleton psr that it needs to be extended - Fix intel_psr_debug_set() calling psr_irq_control() with psr.transcoder not set yet (from Imre). Now we only set the debug register right away if psr is already enabled. Otherwise we just record the value to be set when enabling the source. - Do not depend on the value of TRANSCODER_A. Just be relative to it (from Imre) - handle psr error last so we don't schedule the work before handling the other flags v3: - Adding a warning about setting reserverd bits on EDP_PSR_IMR Cc: Imre Deak Cc: Dhinakaran Pandiyan Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190904213419.27547-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 142 +++++++++-------------- drivers/gpu/drm/i915/i915_reg.h | 13 +-- 2 files changed, 60 insertions(+), 95 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 629b8b98a97f1..1d99ffeaa36ad 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -88,48 +88,20 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, } } -static int edp_psr_shift(enum transcoder cpu_transcoder) +static void psr_irq_control(struct drm_i915_private *dev_priv) { - switch (cpu_transcoder) { - case TRANSCODER_A: - return EDP_PSR_TRANSCODER_A_SHIFT; - case TRANSCODER_B: - return EDP_PSR_TRANSCODER_B_SHIFT; - case TRANSCODER_C: - return EDP_PSR_TRANSCODER_C_SHIFT; - default: - MISSING_CASE(cpu_transcoder); - /* fallthrough */ - case TRANSCODER_EDP: - return EDP_PSR_TRANSCODER_EDP_SHIFT; - } -} - -static void psr_irq_control(struct drm_i915_private *dev_priv, u32 debug) -{ - u32 debug_mask, mask; - enum transcoder cpu_transcoder; - u32 transcoders = BIT(TRANSCODER_EDP); - - if (INTEL_GEN(dev_priv) >= 8) - transcoders |= BIT(TRANSCODER_A) | - BIT(TRANSCODER_B) | - BIT(TRANSCODER_C); - - debug_mask = 0; - mask = 0; - for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { - int shift = edp_psr_shift(cpu_transcoder); - - mask |= EDP_PSR_ERROR(shift); - debug_mask |= EDP_PSR_POST_EXIT(shift) | - EDP_PSR_PRE_ENTRY(shift); - } - - if (debug & I915_PSR_DEBUG_IRQ) - mask |= debug_mask; - - I915_WRITE(EDP_PSR_IMR, ~mask); + enum transcoder trans = dev_priv->psr.transcoder; + u32 val, mask; + + mask = EDP_PSR_ERROR(trans); + if (dev_priv->psr.debug & I915_PSR_DEBUG_IRQ) + mask |= EDP_PSR_POST_EXIT(trans) | EDP_PSR_PRE_ENTRY(trans); + + /* Warning: it is masking/setting reserved bits too */ + val = I915_READ(EDP_PSR_IMR); + val &= ~EDP_PSR_TRANS_MASK(trans); + val |= ~mask; + I915_WRITE(EDP_PSR_IMR, val); } static void psr_event_print(u32 val, bool psr2_enabled) @@ -171,60 +143,48 @@ static void psr_event_print(u32 val, bool psr2_enabled) void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) { - u32 transcoders = BIT(TRANSCODER_EDP); - enum transcoder cpu_transcoder; + enum transcoder cpu_transcoder = dev_priv->psr.transcoder; ktime_t time_ns = ktime_get(); - u32 mask = 0; - - if (INTEL_GEN(dev_priv) >= 8) - transcoders |= BIT(TRANSCODER_A) | - BIT(TRANSCODER_B) | - BIT(TRANSCODER_C); - - for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { - int shift = edp_psr_shift(cpu_transcoder); - if (psr_iir & EDP_PSR_ERROR(shift)) { - DRM_WARN("[transcoder %s] PSR aux error\n", - transcoder_name(cpu_transcoder)); + if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } - dev_priv->psr.irq_aux_error = true; + if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); - /* - * If this interruption is not masked it will keep - * interrupting so fast that it prevents the scheduled - * work to run. - * Also after a PSR error, we don't want to arm PSR - * again so we don't care about unmask the interruption - * or unset irq_aux_error. - */ - mask |= EDP_PSR_ERROR(shift); - } + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; - if (psr_iir & EDP_PSR_PRE_ENTRY(shift)) { - dev_priv->psr.last_entry_attempt = time_ns; - DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", - transcoder_name(cpu_transcoder)); + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); } + } - if (psr_iir & EDP_PSR_POST_EXIT(shift)) { - dev_priv->psr.last_exit = time_ns; - DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", - transcoder_name(cpu_transcoder)); + if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) { + u32 val; - if (INTEL_GEN(dev_priv) >= 9) { - u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); - bool psr2_enabled = dev_priv->psr.psr2_enabled; + DRM_WARN("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); - I915_WRITE(PSR_EVENT(cpu_transcoder), val); - psr_event_print(val, psr2_enabled); - } - } - } + dev_priv->psr.irq_aux_error = true; - if (mask) { - mask |= I915_READ(EDP_PSR_IMR); - I915_WRITE(EDP_PSR_IMR, mask); + /* + * If this interruption is not masked it will keep + * interrupting so fast that it prevents the scheduled + * work to run. + * Also after a PSR error, we don't want to arm PSR + * again so we don't care about unmask the interruption + * or unset irq_aux_error. + */ + val = I915_READ(EDP_PSR_IMR); + val |= EDP_PSR_ERROR(cpu_transcoder); + I915_WRITE(EDP_PSR_IMR, val); schedule_work(&dev_priv->psr.work); } @@ -747,7 +707,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, I915_WRITE(EDP_PSR_DEBUG(dev_priv->psr.transcoder), mask); - psr_irq_control(dev_priv, dev_priv->psr.debug); + psr_irq_control(dev_priv); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, @@ -772,7 +732,7 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, * to avoid any rendering problems. */ val = I915_READ(EDP_PSR_IIR); - val &= EDP_PSR_ERROR(edp_psr_shift(dev_priv->psr.transcoder)); + val &= EDP_PSR_ERROR(dev_priv->psr.transcoder); if (val) { dev_priv->psr.sink_not_reliable = true; DRM_DEBUG_KMS("PSR interruption error set, not enabling PSR\n"); @@ -1120,7 +1080,13 @@ int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 val) old_mode = dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK; dev_priv->psr.debug = val; - psr_irq_control(dev_priv, dev_priv->psr.debug); + + /* + * Do it right away if it's already enabled, otherwise it will be done + * when enabling the source. + */ + if (dev_priv->psr.enabled) + psr_irq_control(dev_priv); mutex_unlock(&dev_priv->psr.lock); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 45ed96d7c5994..81f68455d4923 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4225,13 +4225,12 @@ enum { /* Bspec claims those aren't shifted but stay at 0x64800 */ #define EDP_PSR_IMR _MMIO(0x64834) #define EDP_PSR_IIR _MMIO(0x64838) -#define EDP_PSR_ERROR(shift) (1 << ((shift) + 2)) -#define EDP_PSR_POST_EXIT(shift) (1 << ((shift) + 1)) -#define EDP_PSR_PRE_ENTRY(shift) (1 << (shift)) -#define EDP_PSR_TRANSCODER_C_SHIFT 24 -#define EDP_PSR_TRANSCODER_B_SHIFT 16 -#define EDP_PSR_TRANSCODER_A_SHIFT 8 -#define EDP_PSR_TRANSCODER_EDP_SHIFT 0 +#define _EDP_PSR_TRANS_SHIFT(trans) ((trans) == TRANSCODER_EDP ? \ + 0 : ((trans) - TRANSCODER_A + 1) * 8) +#define EDP_PSR_TRANS_MASK(trans) (0x7 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_ERROR(trans) (0x4 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_POST_EXIT(trans) (0x2 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_PRE_ENTRY(trans) (0x1 << _EDP_PSR_TRANS_SHIFT(trans)) #define _SRD_AUX_CTL_A 0x60810 #define _SRD_AUX_CTL_EDP 0x6f810 From 8241cfbe67f4082eee5fc72e5a8025c5b58c2ddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 4 Sep 2019 14:34:15 -0700 Subject: [PATCH 101/331] drm/i915/tgl: Access the right register when handling PSR interruptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For older gens PSR IIR and IMR have fixed addresses. From TGL onwards those registers moved to each transcoder offset. The bits for the registers are defined without an offset per transcoder as right now we have one register per transcoder. So add a fake "trans_shift" when calculating the bits offsets: it will be 0 for gen12+ and psr.transcoder otherwise. v2 (Lucas): change the implementation to use trans_shift instead of getting each bit value with a different macro Cc: Imre Deak Cc: Dhinakaran Pandiyan Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190904213419.27547-3-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 60 ++++++++++++++++++------ drivers/gpu/drm/i915/i915_irq.c | 51 +++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 10 +++- 3 files changed, 99 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1d99ffeaa36ad..b3c7eef53bf3a 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -90,18 +90,33 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, static void psr_irq_control(struct drm_i915_private *dev_priv) { - enum transcoder trans = dev_priv->psr.transcoder; - u32 val, mask; + enum transcoder trans_shift; + u32 mask, val; + i915_reg_t imr_reg; - mask = EDP_PSR_ERROR(trans); + /* + * gen12+ has registers relative to transcoder and one per transcoder + * using the same bit definition: handle it as TRANSCODER_EDP to force + * 0 shift in bit definition + */ + if (INTEL_GEN(dev_priv) >= 12) { + trans_shift = 0; + imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder); + } else { + trans_shift = dev_priv->psr.transcoder; + imr_reg = EDP_PSR_IMR; + } + + mask = EDP_PSR_ERROR(trans_shift); if (dev_priv->psr.debug & I915_PSR_DEBUG_IRQ) - mask |= EDP_PSR_POST_EXIT(trans) | EDP_PSR_PRE_ENTRY(trans); + mask |= EDP_PSR_POST_EXIT(trans_shift) | + EDP_PSR_PRE_ENTRY(trans_shift); /* Warning: it is masking/setting reserved bits too */ - val = I915_READ(EDP_PSR_IMR); - val &= ~EDP_PSR_TRANS_MASK(trans); + val = I915_READ(imr_reg); + val &= ~EDP_PSR_TRANS_MASK(trans_shift); val |= ~mask; - I915_WRITE(EDP_PSR_IMR, val); + I915_WRITE(imr_reg, val); } static void psr_event_print(u32 val, bool psr2_enabled) @@ -144,15 +159,25 @@ static void psr_event_print(u32 val, bool psr2_enabled) void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) { enum transcoder cpu_transcoder = dev_priv->psr.transcoder; + enum transcoder trans_shift; + i915_reg_t imr_reg; ktime_t time_ns = ktime_get(); - if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { + if (INTEL_GEN(dev_priv) >= 12) { + trans_shift = 0; + imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder); + } else { + trans_shift = dev_priv->psr.transcoder; + imr_reg = EDP_PSR_IMR; + } + + if (psr_iir & EDP_PSR_PRE_ENTRY(trans_shift)) { dev_priv->psr.last_entry_attempt = time_ns; DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", transcoder_name(cpu_transcoder)); } - if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { + if (psr_iir & EDP_PSR_POST_EXIT(trans_shift)) { dev_priv->psr.last_exit = time_ns; DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", transcoder_name(cpu_transcoder)); @@ -166,7 +191,7 @@ void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) } } - if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) { + if (psr_iir & EDP_PSR_ERROR(trans_shift)) { u32 val; DRM_WARN("[transcoder %s] PSR aux error\n", @@ -182,9 +207,9 @@ void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) * again so we don't care about unmask the interruption * or unset irq_aux_error. */ - val = I915_READ(EDP_PSR_IMR); - val |= EDP_PSR_ERROR(cpu_transcoder); - I915_WRITE(EDP_PSR_IMR, val); + val = I915_READ(imr_reg); + val |= EDP_PSR_ERROR(trans_shift); + I915_WRITE(imr_reg, val); schedule_work(&dev_priv->psr.work); } @@ -731,8 +756,13 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, * first time that PSR HW tries to activate so lets keep PSR disabled * to avoid any rendering problems. */ - val = I915_READ(EDP_PSR_IIR); - val &= EDP_PSR_ERROR(dev_priv->psr.transcoder); + if (INTEL_GEN(dev_priv) >= 12) { + val = I915_READ(TRANS_PSR_IIR(dev_priv->psr.transcoder)); + val &= EDP_PSR_ERROR(0); + } else { + val = I915_READ(EDP_PSR_IIR); + val &= EDP_PSR_ERROR(dev_priv->psr.transcoder); + } if (val) { dev_priv->psr.sink_not_reliable = true; DRM_DEBUG_KMS("PSR interruption error set, not enabling PSR\n"); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 135c9ee55e078..ae7228032d2c3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2613,11 +2613,21 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) } if (iir & GEN8_DE_EDP_PSR) { - u32 psr_iir = I915_READ(EDP_PSR_IIR); + u32 psr_iir; + i915_reg_t iir_reg; + + if (INTEL_GEN(dev_priv) >= 12) + iir_reg = TRANS_PSR_IIR(dev_priv->psr.transcoder); + else + iir_reg = EDP_PSR_IIR; + + psr_iir = I915_READ(iir_reg); + I915_WRITE(iir_reg, psr_iir); + + if (psr_iir) + found = true; intel_psr_irq_handler(dev_priv, psr_iir); - I915_WRITE(EDP_PSR_IIR, psr_iir); - found = true; } if (!found) @@ -3233,8 +3243,23 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0); - intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); - intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder trans; + + for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + enum intel_display_power_domain domain; + + domain = POWER_DOMAIN_TRANSCODER(trans); + if (!intel_display_power_is_enabled(dev_priv, domain)) + continue; + + intel_uncore_write(uncore, TRANS_PSR_IMR(trans), 0xffffffff); + intel_uncore_write(uncore, TRANS_PSR_IIR(trans), 0xffffffff); + } + } else { + intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); + } for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, @@ -3740,7 +3765,21 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder trans; + + for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + enum intel_display_power_domain domain; + + domain = POWER_DOMAIN_TRANSCODER(trans); + if (!intel_display_power_is_enabled(dev_priv, domain)) + continue; + + gen3_assert_iir_is_zero(uncore, TRANS_PSR_IIR(trans)); + } + } else { + gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); + } for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 81f68455d4923..de9e679e90bb5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4222,9 +4222,17 @@ enum { #define EDP_PSR_TP1_TIME_0us (3 << 4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 -/* Bspec claims those aren't shifted but stay at 0x64800 */ +/* + * Until TGL, IMR/IIR are fixed at 0x648xx. On TGL+ those registers are relative + * to transcoder and bits defined for each one as if using no shift (i.e. as if + * it was for TRANSCODER_EDP) + */ #define EDP_PSR_IMR _MMIO(0x64834) #define EDP_PSR_IIR _MMIO(0x64838) +#define _PSR_IMR_A 0x60814 +#define _PSR_IIR_A 0x60818 +#define TRANS_PSR_IMR(tran) _MMIO_TRANS2(tran, _PSR_IMR_A) +#define TRANS_PSR_IIR(tran) _MMIO_TRANS2(tran, _PSR_IIR_A) #define _EDP_PSR_TRANS_SHIFT(trans) ((trans) == TRANSCODER_EDP ? \ 0 : ((trans) - TRANSCODER_A + 1) * 8) #define EDP_PSR_TRANS_MASK(trans) (0x7 << _EDP_PSR_TRANS_SHIFT(trans)) From e468ff06157a3bb1b70ac8ea807429446b8bf4b2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Sep 2019 14:34:16 -0700 Subject: [PATCH 102/331] drm/i915: protect access to DP_TP_* on non-dp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP_TP_{CTL,STATUS} should only be programmed when the encoder is intel_dp. Checking its current usages intel_disable_ddi_buf() is the only offender, with other places being protected by checks like pipe_config->fec_enable that is only set by intel_dp. v3 (José): - Using intel_crtc_has_dp_encoder() instead of intel_encoder_is_dp() (Ville) Cc: Matt Roper Cc: Ville Syrjälä Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190904213419.27547-4-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_ddi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 1fe0bf01e580e..ec132cd6add88 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3465,10 +3465,12 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder, wait = true; } - val = I915_READ(DP_TP_CTL(port)); - val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); - val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); + if (intel_crtc_has_dp_encoder(crtc_state)) { + val = I915_READ(DP_TP_CTL(port)); + val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); + val |= DP_TP_CTL_LINK_TRAIN_PAT1; + I915_WRITE(DP_TP_CTL(port), val); + } /* Disable FEC in DP Sink */ intel_ddi_disable_fec_state(encoder, crtc_state); From 4444df6e205bc1dc16ba112588815defe3c5a724 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Sep 2019 14:34:17 -0700 Subject: [PATCH 103/331] drm/i915/tgl: move DP_TP_* to transcoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen 12 onwards moves the DP_TP_* registers to be transcoder-based rather than port-based. This adds the new register addresses and changes all the callers to use the register saved in intel_dp->regs.*. This is filled out when preparing to enable the port so we take into account if we should use the transcoder or the port. v2: reimplement by stashing the registers we want to access under intel_dp->reg. Now they are initialized when enabling the port. Ville suggested to store the transcoder to be used exclusively by TGL+. After implementing I thought just storing the register directly made it cleaner. Cc: Matt Roper Cc: Ville Syrjälä Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190904213419.27547-5-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_ddi.c | 43 ++++++++++++------- .../drm/i915/display/intel_display_types.h | 9 ++++ drivers/gpu/drm/i915/display/intel_dp.c | 13 +++--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 8 ++-- drivers/gpu/drm/i915/i915_reg.h | 4 ++ 5 files changed, 51 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index ec132cd6add88..3e6394139964d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3167,17 +3167,18 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + struct intel_dp *intel_dp; u32 val; if (!crtc_state->fec_enable) return; - val = I915_READ(DP_TP_CTL(port)); + intel_dp = enc_to_intel_dp(&encoder->base); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val |= DP_TP_CTL_FEC_ENABLE; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); - if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, DP_TP_STATUS_FEC_ENABLE_LIVE, 1)) DRM_ERROR("Timed out waiting for FEC Enable Status\n"); } @@ -3186,16 +3187,17 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + struct intel_dp *intel_dp; u32 val; if (!crtc_state->fec_enable) return; - val = I915_READ(DP_TP_CTL(port)); + intel_dp = enc_to_intel_dp(&encoder->base); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_FEC_ENABLE; - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); } static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, @@ -3208,10 +3210,14 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int level = intel_ddi_dp_level(intel_dp); + enum transcoder transcoder = crtc_state->cpu_transcoder; intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); + intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(transcoder); + intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(transcoder); + /* 1.a got on intel_atomic_commit_tail() */ /* 2. */ @@ -3300,6 +3306,9 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); + intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); + intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, crtc_state); @@ -3466,10 +3475,12 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder, } if (intel_crtc_has_dp_encoder(crtc_state)) { - val = I915_READ(DP_TP_CTL(port)); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); } /* Disable FEC in DP Sink */ @@ -3898,7 +3909,7 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) u32 val; bool wait = false; - if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { + if (I915_READ(intel_dp->regs.dp_tp_ctl) & DP_TP_CTL_ENABLE) { val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { val &= ~DDI_BUF_CTL_ENABLE; @@ -3906,11 +3917,11 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) wait = true; } - val = I915_READ(DP_TP_CTL(port)); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); if (wait) intel_wait_ddi_buf_idle(dev_priv, port); @@ -3925,8 +3936,8 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) val |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; } - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); intel_dp->DP |= DDI_BUF_CTL_ENABLE; I915_WRITE(DDI_BUF_CTL(port), intel_dp->DP); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 61277a87dbe7a..d5cc4b810d9ef 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1218,6 +1218,15 @@ struct intel_dp { bool can_mst; /* this port supports mst */ bool is_mst; int active_mst_links; + + /* + * DP_TP_* registers may be either on port or transcoder register space. + */ + struct { + i915_reg_t dp_tp_ctl; + i915_reg_t dp_tp_status; + } regs; + /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4e3e696e7fc84..f06fbca5cb3ea 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2297,6 +2297,9 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); + intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); + /* * There are four kinds of DP registers: * @@ -3253,7 +3256,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, dp_train_pat & train_pat_mask); if (HAS_DDI(dev_priv)) { - u32 temp = I915_READ(DP_TP_CTL(port)); + u32 temp = I915_READ(intel_dp->regs.dp_tp_ctl); if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) temp |= DP_TP_CTL_SCRAMBLE_DISABLE; @@ -3279,7 +3282,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, temp |= DP_TP_CTL_LINK_TRAIN_PAT4; break; } - I915_WRITE(DP_TP_CTL(port), temp); + I915_WRITE(intel_dp->regs.dp_tp_ctl, temp); } else if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) || (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { @@ -3980,10 +3983,10 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) if (!HAS_DDI(dev_priv)) return; - val = I915_READ(DP_TP_CTL(port)); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_LINK_TRAIN_MASK; val |= DP_TP_CTL_LINK_TRAIN_IDLE; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); /* * Until TGL on PORT_A we can have only eDP in SST mode. There the only @@ -3995,7 +3998,7 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) if (port == PORT_A && INTEL_GEN(dev_priv) < 12) return; - if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, DP_TP_STATUS_IDLE_DONE, 1)) DRM_ERROR("Timed out waiting for DP idle patterns\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 37366f81255b4..3f9cdbaab95e6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -295,7 +295,6 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->base.port; struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; @@ -326,8 +325,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_ERROR("failed to allocate vcpi\n"); intel_dp->active_mst_links++; - temp = I915_READ(DP_TP_STATUS(port)); - I915_WRITE(DP_TP_STATUS(port), temp); + temp = I915_READ(intel_dp->regs.dp_tp_status); + I915_WRITE(intel_dp->regs.dp_tp_status, temp); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); @@ -342,11 +341,10 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->base.port; DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); - if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, DP_TP_STATUS_ACT_SENT, 1)) DRM_ERROR("Timed out waiting for ACT sent\n"); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index de9e679e90bb5..006cffd56be27 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9546,7 +9546,9 @@ enum skl_power_gate { /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 #define _DP_TP_CTL_B 0x64140 +#define _TGL_DP_TP_CTL_A 0x60540 #define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B) +#define TGL_DP_TP_CTL(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_CTL_A) #define DP_TP_CTL_ENABLE (1 << 31) #define DP_TP_CTL_FEC_ENABLE (1 << 30) #define DP_TP_CTL_MODE_SST (0 << 27) @@ -9566,7 +9568,9 @@ enum skl_power_gate { /* DisplayPort Transport Status */ #define _DP_TP_STATUS_A 0x64044 #define _DP_TP_STATUS_B 0x64144 +#define _TGL_DP_TP_STATUS_A 0x60544 #define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B) +#define TGL_DP_TP_STATUS(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_STATUS_A) #define DP_TP_STATUS_FEC_ENABLE_LIVE (1 << 28) #define DP_TP_STATUS_IDLE_DONE (1 << 25) #define DP_TP_STATUS_ACT_SENT (1 << 24) From 8ffa4392a32e27b3945b8c0585550d3dd831c68b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Sep 2019 14:34:18 -0700 Subject: [PATCH 104/331] drm/i915/tgl: disable SAGV temporarily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SAGV is not currently working for Tiger Lake. We better disable it until the implementation is stabilized and we can enable it. HSDES: 1409542895 2208191909 Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190904213419.27547-6-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4fa9bc83c8b4f..7294fcf053237 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3654,6 +3654,10 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { + /* HACK! */ + if (IS_GEN(dev_priv, 12)) + return false; + return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } From 5b548ae63d584c76d41926e9efd63b0d2d56127f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Sep 2019 14:34:19 -0700 Subject: [PATCH 105/331] drm/i915/tgl: add gen12 to stolen initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add case for gen == 12 and add MISSING_CASE() for future gens. We were already handling gen12 as the default, so this doesn't change the current behavior. BSpec: 19481 and 44980 Cc: CQ Tang Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190904213419.27547-7-jose.souza@intel.com Signed-off-by: José Roberto de Souza --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index aa533b4ab5f54..7ce5259d73d62 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -425,8 +425,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) bdw_get_stolen_reserved(dev_priv, &reserved_base, &reserved_size); break; - case 11: default: + MISSING_CASE(INTEL_GEN(dev_priv)); + /* fall-through */ + case 11: + case 12: icl_get_stolen_reserved(dev_priv, &reserved_base, &reserved_size); break; From d10e0cb7591ef88fa8edfb164327a30d323b9d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 4 Sep 2019 16:02:40 -0700 Subject: [PATCH 106/331] drm/i915: Apply FBC WA for TGL too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WA 1409120013 is also valid for TGL, so lets check for ">= 11". BSpec: 52890 Cc: Matt Roper Cc: Clinton Taylor Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190904230241.20638-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_fbc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 16ed44bfd7348..dc34b23e23201 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -343,8 +343,8 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) HSW_FBCQ_DIS); } - if (IS_GEN(dev_priv, 11)) - /* Wa_1409120013:icl,ehl */ + if (INTEL_GEN(dev_priv) >= 11) + /* Wa_1409120013:icl,ehl,tgl */ I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); From aaef851083ed577a4a6094dec3caf7f00f1fd0db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 4 Sep 2019 16:02:41 -0700 Subject: [PATCH 107/331] drm/i915/mst: Do not hardcoded the crtcs that encoder can connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tiger Lake has up to 4 pipes so the mask would need to be 0xf instead of 0x7. Do not hardcode the mask so it allows the fake MST encoders to connect to all pipes no matter how many the platform has. Iterating over all pipes to keep consistent with intel_ddi_init(). Initialy this patch was replaced by commit 4eaceea3a00f ("drm/i915: Fix DP-MST crtc_mask") but userspace it not correctly using encoder.possible_crtcs and it was reverted by commit e838bfa8e170 ("Revert "drm/i915: Fix DP-MST crtc_mask"") Userspace should be fixed but it might take a while, so bringing this patch back for now. Cc: Lucas De Marchi Cc: Ville Syrjälä Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190904230241.20638-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 3f9cdbaab95e6..eeeb3f933aa4c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -597,6 +597,8 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum struct intel_dp_mst_encoder *intel_mst; struct intel_encoder *intel_encoder; struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe_iter; intel_mst = kzalloc(sizeof(*intel_mst), GFP_KERNEL); @@ -613,8 +615,9 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; - intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; + for_each_pipe(dev_priv, pipe_iter) + intel_encoder->crtc_mask |= BIT(pipe_iter); intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; From a8c15954d64ac7177559c9f5eeb6593e5883fd69 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 3 Sep 2019 07:21:33 +0100 Subject: [PATCH 108/331] drm/i915: Protect debugfs per_file_stats with RCU lock If we make sure we grab a strong reference to each object as we dump it, we can reduce the locks outside of our iterators to an rcu_read_lock. This should prevent errors like: [ 2138.371911] BUG: KASAN: use-after-free in per_file_stats+0x43/0x380 [i915] [ 2138.371924] Read of size 8 at addr ffff888223651000 by task cat/8293 [ 2138.371947] CPU: 0 PID: 8293 Comm: cat Not tainted 5.3.0-rc6-CI-Custom_4352+ #1 [ 2138.371953] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.40 07/14/2017 [ 2138.371959] Call Trace: [ 2138.371974] dump_stack+0x7c/0xbb [ 2138.372099] ? per_file_stats+0x43/0x380 [i915] [ 2138.372108] print_address_description+0x73/0x3a0 [ 2138.372231] ? per_file_stats+0x43/0x380 [i915] [ 2138.372352] ? per_file_stats+0x43/0x380 [i915] [ 2138.372362] __kasan_report+0x14e/0x192 [ 2138.372489] ? per_file_stats+0x43/0x380 [i915] [ 2138.372502] kasan_report+0xe/0x20 [ 2138.372625] per_file_stats+0x43/0x380 [i915] [ 2138.372751] ? i915_panel_show+0x110/0x110 [i915] [ 2138.372761] idr_for_each+0xa7/0x160 [ 2138.372773] ? idr_get_next_ul+0x110/0x110 [ 2138.372782] ? do_raw_spin_lock+0x10a/0x1d0 [ 2138.372923] print_context_stats+0x264/0x510 [i915] Signed-off-by: Chris Wilson Tested-by: David Weinehall Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190903062133.27360-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9798f27a697ac..708855e051b5c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -237,6 +237,9 @@ static int per_file_stats(int id, void *ptr, void *data) struct file_stats *stats = data; struct i915_vma *vma; + if (!kref_get_unless_zero(&obj->base.refcount)) + return 0; + stats->count++; stats->total += obj->base.size; if (!atomic_read(&obj->bind_count)) @@ -284,6 +287,7 @@ static int per_file_stats(int id, void *ptr, void *data) } spin_unlock(&obj->vma.lock); + i915_gem_object_put(obj); return 0; } @@ -313,10 +317,12 @@ static void print_context_stats(struct seq_file *m, i915_gem_context_lock_engines(ctx), it) { intel_context_lock_pinned(ce); if (intel_context_is_pinned(ce)) { + rcu_read_lock(); if (ce->state) per_file_stats(0, ce->state->obj, &kstats); per_file_stats(0, ce->ring->vma->obj, &kstats); + rcu_read_unlock(); } intel_context_unlock_pinned(ce); } @@ -328,9 +334,9 @@ static void print_context_stats(struct seq_file *m, struct task_struct *task; char name[80]; - spin_lock(&file->table_lock); + rcu_read_lock(); idr_for_each(&file->object_idr, per_file_stats, &stats); - spin_unlock(&file->table_lock); + rcu_read_unlock(); rcu_read_lock(); task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID); From ca9cab183449787058f700fd0a74a8c91b277268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 13 Apr 2019 11:13:27 +0000 Subject: [PATCH 109/331] drm/i915: add immutable zpos plane properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds basic immutable support for the zpos property. The zpos increases from bottom to top: primary, sprites, cursor. Signed-off-by: Ville Syrjälä [contact@emersion.fr: adapted for latest drm-tip] Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/YSH9PasoADJJdNJCSdI4m55ankIBsCaoSgkw-NQ5dlruCAxc8J-SQwVl5n3ddSAMDLTdbdyQvkONmtbjkUU-TQk5VIu1p-aZRO1OjjuSxjY=@emersion.fr Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++-- drivers/gpu/drm/i915/display/intel_sprite.c | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 06cf2171474d8..3b5275ab66cf0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14887,7 +14887,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) const u64 *modifiers; const u32 *formats; int num_formats; - int ret; + int ret, zpos; if (INTEL_GEN(dev_priv) >= 9) return skl_universal_plane_create(dev_priv, pipe, @@ -14976,6 +14976,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_MODE_ROTATE_0, supported_rotations); + zpos = 0; + drm_plane_create_zpos_immutable_property(&plane->base, zpos); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; @@ -14992,7 +14995,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, { unsigned int possible_crtcs; struct intel_plane *cursor; - int ret; + int ret, zpos; cursor = intel_plane_alloc(); if (IS_ERR(cursor)) @@ -15041,6 +15044,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180); + zpos = RUNTIME_INFO(dev_priv)->num_sprites[pipe] + 1; + drm_plane_create_zpos_immutable_property(&cursor->base, zpos); + drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); return cursor; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 8ca2fbc2af1df..e415b0ad4a425 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -2592,6 +2592,8 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, BIT(DRM_MODE_BLEND_PREMULTI) | BIT(DRM_MODE_BLEND_COVERAGE)); + drm_plane_create_zpos_immutable_property(&plane->base, plane_id); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; @@ -2613,7 +2615,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, const u64 *modifiers; const u32 *formats; int num_formats; - int ret; + int ret, zpos; if (INTEL_GEN(dev_priv) >= 9) return skl_universal_plane_create(dev_priv, pipe, @@ -2703,6 +2705,9 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + zpos = sprite + 1; + drm_plane_create_zpos_immutable_property(&plane->base, zpos); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; From 71cd86cfaa12645ca39e5bbeceb2039af74fba2e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Sep 2019 11:13:37 -0700 Subject: [PATCH 110/331] drm/i915/tgl: Use refclk/2 as bypass frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike gen11, which always ran at 50MHz when the cdclk PLL was disabled, TGL runs at refclk/2. The 50MHz croclk/2 is only used by hardware during some power state transitions. Bspec: 49201 Cc: José Roberto de Souza Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190905181337.23727-1-matthew.d.roper@intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_cdclk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 76f11d465e91e..d3e56628af70b 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1855,8 +1855,6 @@ static void icl_get_cdclk(struct drm_i915_private *dev_priv, u32 val; int div; - cdclk_state->bypass = 50000; - val = I915_READ(SKL_DSSM); switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { default: @@ -1873,6 +1871,11 @@ static void icl_get_cdclk(struct drm_i915_private *dev_priv, break; } + if (INTEL_GEN(dev_priv) >= 12) + cdclk_state->bypass = cdclk_state->ref / 2; + else + cdclk_state->bypass = 50000; + val = I915_READ(BXT_DE_PLL_ENABLE); if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || (val & BXT_DE_PLL_LOCK) == 0) { From cdb736fa8b8b65740030ba8f9dc87f0fc3313858 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 6 Sep 2019 16:49:57 +0300 Subject: [PATCH 111/331] drm/i915: Use engine relative LRIs on context setup Daniele pointed out that relative mmio works differently in on context restore. Instead of adding the engine mmio base to offset, it masks out the base and adds bits [12:2] to current engine base. This should allow us to construct context register state to be applicable to all instances, including virtual. And avoid the trouble of updating the registers on virtual instances when submitting work. v2: only enable for gen12 for now (Mika) v3: make enabling readable (Chris) Bspec: 20206 Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Lucas De Marchi Cc: John Harrison Suggested-by: Daniele Ceraolo Spurio Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190906134957.25909-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 +++++ drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 2 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 32 ++++++++++++++------ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 15e02cb58a678..943f0663837ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -481,6 +481,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) unsigned int flags; /* @@ -576,6 +577,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } +static inline bool +intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) +{ + return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; +} + #define instdone_has_slice(dev_priv___, sseu___, slice___) \ ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 86e00a2db8a45..fbad403ab7ac0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -132,6 +132,8 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ +#define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 87b7473a6dfb1..de86ca6add09e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1214,7 +1214,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) unsigned int n; GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - virtual_update_register_offsets(regs, engine); + + if (!intel_engine_has_relative_mmio(engine)) + virtual_update_register_offsets(regs, + engine); if (!list_empty(&ve->context.signals)) virtual_xfer_breadcrumbs(ve, engine); @@ -2939,6 +2942,9 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } + + if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) + engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } static void execlists_destroy(struct intel_engine_cs *engine) @@ -3130,8 +3136,10 @@ static void execlists_init_reg_state(u32 *regs, struct intel_ring *ring) { struct i915_ppgtt *ppgtt = vm_alias(ce->vm); - bool rcs = engine->class == RENDER_CLASS; - u32 base = engine->mmio_base; + const bool rcs = engine->class == RENDER_CLASS; + const u32 base = engine->mmio_base; + const u32 lri_base = + intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; /* * A context is actually a big batch buffer with several @@ -3143,8 +3151,10 @@ static void execlists_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED; + regs[CTX_LRI_HEADER_0] = + MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED | + lri_base; CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | @@ -3191,7 +3201,10 @@ static void execlists_init_reg_state(u32 *regs, } } - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + regs[CTX_LRI_HEADER_1] = + MI_LOAD_REGISTER_IMM(9) | + MI_LRI_FORCE_POSTED | + lri_base; CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); /* PDP values well be assigned later if needed */ @@ -3218,7 +3231,7 @@ static void execlists_init_reg_state(u32 *regs, } if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base; CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); } @@ -3411,8 +3424,9 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) return; swap(ve->siblings[swp], ve->siblings[0]); - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); + if (!intel_engine_has_relative_mmio(ve->siblings[0])) + virtual_update_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); } static int virtual_context_pin(struct intel_context *ce) From 5bf05dc58d65b215437df3013163a7eea78d5d4c Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 6 Sep 2019 15:23:14 +0300 Subject: [PATCH 112/331] drm/i915/tgl: Register state context definition for Gen12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen12 has subtle changes in the reg state context offsets (some fields are gone, some are in a different location), compared to previous Gens. The simplest approach seems to be keeping Gen12 (and future platform) changes apart from the previous gens, while keeping the registers that are contiguous in functions we can reuse. v2: alias, virtual engine, rpcs, prune unused regs v3: use engine base (Daniele), take ctx_bb for all Bspec: 46255 Cc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Joonas Lahtinen Cc: José Roberto de Souza Signed-off-by: Michel Thierry Signed-off-by: Lucas De Marchi Signed-off-by: Mika Kuoppala Reviewed-by: Daniele Ceraolo Spurio Tested-by: Daniele Ceraolo Spurio [ickle: Tweaked the GEM_WARN_ON after settling on a compromise with Daniele] Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190906122314.2146-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 200 +++++++++++++++++------- drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 6 +- 2 files changed, 147 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index de86ca6add09e..0ddfbebbcbbc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -808,8 +808,12 @@ static void virtual_update_register_offsets(u32 *regs, { u32 base = engine->mmio_base; + /* Refactor so that we only have one place that knows all the offsets! */ + GEM_WARN_ON(INTEL_GEN(engine->i915) >= 12); + /* Must match execlists_init_reg_state()! */ + /* Common part */ regs[CTX_CONTEXT_CONTROL] = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); @@ -820,13 +824,16 @@ static void virtual_update_register_offsets(u32 *regs, regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); + regs[CTX_SECOND_BB_HEAD_U] = i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); + /* PPGTT part */ regs[CTX_CTX_TIMESTAMP] = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); + regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); @@ -3122,39 +3129,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) -{ - if (i915_is_ggtt(vm)) - return i915_vm_to_ggtt(vm)->alias; - else - return i915_vm_to_ppgtt(vm); -} -static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) +static void init_common_reg_state(u32 * const regs, + struct i915_ppgtt * const ppgtt, + struct intel_engine_cs *engine, + struct intel_ring *ring) { - struct i915_ppgtt *ppgtt = vm_alias(ce->vm); - const bool rcs = engine->class == RENDER_CLASS; const u32 base = engine->mmio_base; - const u32 lri_base = - intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; - - /* - * A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - * - * Must keep consistent with virtual_update_register_offsets(). - */ - regs[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED | - lri_base; CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | @@ -3172,41 +3153,43 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) { - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(base), 0); - if (wa_ctx->indirect_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); +} - regs[CTX_RCS_INDIRECT_CTX + 1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); +static void init_wa_bb_reg_state(u32 * const regs, + struct intel_engine_cs *engine, + u32 pos_bb_per_ctx) +{ + struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; + const u32 base = engine->mmio_base; + const u32 pos_indirect_ctx = pos_bb_per_ctx + 2; + const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2; - regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = - intel_lr_indirect_ctx_offset(engine) << 6; - } + CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, pos_indirect_ctx_offset, + RING_INDIRECT_CTX_OFFSET(base), 0); + if (wa_ctx->indirect_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); - if (wa_ctx->per_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + regs[pos_indirect_ctx + 1] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - regs[CTX_BB_PER_CTX_PTR + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } + regs[pos_indirect_ctx_offset + 1] = + intel_lr_indirect_ctx_offset(engine) << 6; } - regs[CTX_LRI_HEADER_1] = - MI_LOAD_REGISTER_IMM(9) | - MI_LRI_FORCE_POSTED | - lri_base; + CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0); + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); + regs[pos_bb_per_ctx + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; + } +} + +static void init_ppgtt_reg_state(u32 *regs, u32 base, + struct i915_ppgtt *ppgtt) +{ /* PDP values well be assigned later if needed */ CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); @@ -3229,6 +3212,47 @@ static void execlists_init_reg_state(u32 *regs, ASSIGN_CTX_PDP(ppgtt, regs, 1); ASSIGN_CTX_PDP(ppgtt, regs, 0); } +} + +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); +} + +static void gen8_init_reg_state(u32 * const regs, + struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + struct i915_ppgtt * const ppgtt = vm_alias(ce->vm); + const bool rcs = engine->class == RENDER_CLASS; + const u32 base = engine->mmio_base; + const u32 lri_base = + intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; + + regs[CTX_LRI_HEADER_0] = + MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED | + lri_base; + + init_common_reg_state(regs, ppgtt, engine, ring); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) + init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR); + + regs[CTX_LRI_HEADER_1] = + MI_LOAD_REGISTER_IMM(9) | + MI_LRI_FORCE_POSTED | + lri_base; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); + + init_ppgtt_reg_state(regs, base, ppgtt); if (rcs) { regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base; @@ -3240,6 +3264,66 @@ static void execlists_init_reg_state(u32 *regs, regs[CTX_END] |= BIT(0); } +static void gen12_init_reg_state(u32 * const regs, + struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm); + const bool rcs = engine->class == RENDER_CLASS; + const u32 base = engine->mmio_base; + const u32 lri_base = + intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; + + regs[CTX_LRI_HEADER_0] = + MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) | + MI_LRI_FORCE_POSTED | + lri_base; + + init_common_reg_state(regs, ppgtt, engine, ring); + + /* We want ctx_ptr for all engines to be set */ + init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR); + + regs[CTX_LRI_HEADER_1] = + MI_LOAD_REGISTER_IMM(9) | + MI_LRI_FORCE_POSTED | + lri_base; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); + + init_ppgtt_reg_state(regs, base, ppgtt); + + if (rcs) { + regs[GEN12_CTX_LRI_HEADER_3] = + MI_LOAD_REGISTER_IMM(1) | lri_base; + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); + + /* TODO: oa_init_reg_state ? */ + } +} + +static void execlists_init_reg_state(u32 *regs, + struct intel_context *ce, + struct intel_engine_cs *engine, + struct intel_ring *ring) +{ + /* + * A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). + */ + if (INTEL_GEN(engine->i915) >= 12) + gen12_init_reg_state(regs, ce, engine, ring); + else + gen8_init_reg_state(regs, ce, engine, ring); +} + static int populate_lr_context(struct intel_context *ce, struct drm_i915_gem_object *ctx_obj, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index b8f20ad711692..68caf85418660 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,7 +9,7 @@ #include -/* GEN8+ Reg State Context */ +/* GEN8 to GEN11 Reg State Context */ #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 #define CTX_RING_HEAD 0x04 @@ -39,6 +39,10 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_END 0x44 +/* GEN12+ Reg State Context */ +#define GEN12_CTX_BB_PER_CTX_PTR 0x12 +#define GEN12_CTX_LRI_HEADER_3 0x41 + #define CTX_REG(reg_state, pos, reg, val) do { \ u32 *reg_state__ = (reg_state); \ const u32 pos__ = (pos); \ From 42014f69bb235f1123a6f8befb58b3e350a6f180 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 5 Sep 2019 14:14:03 +0300 Subject: [PATCH 113/331] drm/i915: Hook up GT power management Refactor the GT power management interface to work through the GT now that it is under the control of gt/ Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190905111403.10071-1-andi.shyti@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 1 + drivers/gpu/drm/i915/gt/intel_gt.c | 45 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt.h | 9 ++-- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 36 +++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 + drivers/gpu/drm/i915/i915_drv.c | 21 ++------- drivers/gpu/drm/i915/i915_gem.c | 30 ++----------- drivers/gpu/drm/i915/intel_pm.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 9 files changed, 100 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 92e53c25424c5..b3993d24b83db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -137,6 +137,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) bool i915_gem_load_power_context(struct drm_i915_private *i915) { + intel_gt_pm_enable(&i915->gt); return switch_to_kernel_context_sync(&i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d48ec9a76ed16..e2cc697d27fbc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_uncore.h" +#include "intel_pm.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -27,6 +28,9 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) void intel_gt_init_hw(struct drm_i915_private *i915) { i915->gt.ggtt = &i915->ggtt; + + /* BIOS often leaves RC6 enabled, but disable it for hw init */ + intel_gt_pm_disable(&i915->gt); } static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) @@ -222,7 +226,13 @@ void intel_gt_chipset_flush(struct intel_gt *gt) intel_gtt_chipset_flush(); } -int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) +void intel_gt_driver_register(struct intel_gt *gt) +{ + if (IS_GEN(gt->i915, 5)) + intel_gpu_ips_init(gt->i915); +} + +static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; @@ -256,11 +266,42 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) return ret; } -void intel_gt_fini_scratch(struct intel_gt *gt) +static void intel_gt_fini_scratch(struct intel_gt *gt) { i915_vma_unpin_and_release(>->scratch, 0); } +int intel_gt_init(struct intel_gt *gt) +{ + int err; + + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + if (err) + return err; + + return 0; +} + +void intel_gt_driver_remove(struct intel_gt *gt) +{ + GEM_BUG_ON(gt->awake); + intel_gt_pm_disable(gt); +} + +void intel_gt_driver_unregister(struct intel_gt *gt) +{ + intel_gpu_ips_teardown(); +} + +void intel_gt_driver_release(struct intel_gt *gt) +{ + /* Paranoia: make sure we have disabled everything before we exit. */ + intel_gt_pm_disable(gt); + + intel_cleanup_gt_powersave(gt->i915); + intel_gt_fini_scratch(gt); +} + void intel_gt_driver_late_release(struct intel_gt *gt) { intel_uc_driver_late_release(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4920cb351f109..17af21cb7ed35 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -29,6 +29,12 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void intel_gt_init_hw(struct drm_i915_private *i915); +int intel_gt_init(struct intel_gt *gt); +void intel_gt_driver_register(struct intel_gt *gt); + +void intel_gt_driver_unregister(struct intel_gt *gt); +void intel_gt_driver_remove(struct intel_gt *gt); +void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); @@ -41,9 +47,6 @@ void intel_gt_chipset_flush(struct intel_gt *gt); void intel_gt_init_hangcheck(struct intel_gt *gt); -int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size); -void intel_gt_fini_scratch(struct intel_gt *gt); - static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, enum intel_gt_scratch_field field) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aa6cf0152ce74..6ba0d2069f875 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -124,6 +124,42 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) __intel_engine_reset(engine, false); } +static bool is_mock_device(const struct intel_gt *gt) +{ + return I915_SELFTEST_ONLY(gt->awake == -1); +} + +void intel_gt_pm_enable(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(gt->i915)) + return; + + if (is_mock_device(gt)) + return; + + intel_gt_pm_get(gt); + + for_each_engine(engine, gt->i915, id) { + intel_engine_pm_get(engine); + engine->serial++; /* force kernel context reload */ + intel_engine_pm_put(engine); + } + + intel_gt_pm_put(gt); +} + +void intel_gt_pm_disable(struct intel_gt *gt) +{ + if (is_mock_device(gt)) + return; + + intel_sanitize_gt_powersave(gt->i915); +} + int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index fb39d99cd6eef..d1f3e2e239374 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -43,6 +43,8 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) } void intel_gt_pm_init_early(struct intel_gt *gt); +void intel_gt_pm_enable(struct intel_gt *gt); +void intel_gt_pm_disable(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); int intel_gt_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9478f704a315e..63e52e1534143 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1316,9 +1316,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); - /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_sanitize_gt_powersave(dev_priv); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -1424,8 +1421,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) acpi_video_register(); } - if (IS_GEN(dev_priv, 5)) - intel_gpu_ips_init(dev_priv); + intel_gt_driver_register(&dev_priv->gt); intel_audio_init(dev_priv); @@ -1468,7 +1464,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ drm_kms_helper_poll_fini(&dev_priv->drm); - intel_gpu_ips_teardown(); + intel_gt_driver_unregister(&dev_priv->gt); acpi_video_unregister(); intel_opregion_unregister(dev_priv); @@ -1612,9 +1608,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_cleanup_hw: i915_driver_hw_remove(dev_priv); i915_ggtt_driver_release(dev_priv); - - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_sanitize_gt_powersave(dev_priv); out_cleanup_mmio: i915_driver_mmio_release(dev_priv); out_runtime_pm_put: @@ -1685,9 +1678,6 @@ static void i915_driver_release(struct drm_device *dev) i915_ggtt_driver_release(dev_priv); - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_sanitize_gt_powersave(dev_priv); - i915_driver_mmio_release(dev_priv); enable_rpm_wakeref_asserts(rpm); @@ -1916,7 +1906,7 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - intel_sanitize_gt_powersave(dev_priv); + intel_gt_pm_disable(&dev_priv->gt); i915_gem_sanitize(dev_priv); @@ -2044,7 +2034,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_power_resume_early(dev_priv); - intel_sanitize_gt_powersave(dev_priv); + intel_gt_pm_disable(&dev_priv->gt); intel_power_domains_resume(dev_priv); @@ -2588,9 +2578,6 @@ static int intel_runtime_suspend(struct device *kdev) struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; int ret = 0; - if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv)))) - return -ENODEV; - if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) return -ENODEV; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 95e7c52cf8edc..141024c66f36b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1375,17 +1375,6 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) return err; } -static int -i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) -{ - return intel_gt_init_scratch(&i915->gt, size); -} - -static void i915_gem_fini_scratch(struct drm_i915_private *i915) -{ - intel_gt_fini_scratch(&i915->gt); -} - static int intel_engines_verify_workarounds(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -1436,12 +1425,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_init_scratch(dev_priv, - IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); - if (ret) { - GEM_BUG_ON(ret == -EIO); - goto err_ggtt; - } + intel_gt_init(&dev_priv->gt); ret = intel_engines_setup(dev_priv); if (ret) { @@ -1527,15 +1511,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_uc_init: if (ret != -EIO) { intel_uc_fini(&dev_priv->gt.uc); - intel_cleanup_gt_powersave(dev_priv); intel_engines_cleanup(dev_priv); } err_context: if (ret != -EIO) i915_gem_contexts_fini(dev_priv); err_scratch: - i915_gem_fini_scratch(dev_priv); -err_ggtt: + intel_gt_driver_release(&dev_priv->gt); err_unlock: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -1587,12 +1569,10 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915) void i915_gem_driver_remove(struct drm_i915_private *dev_priv) { - GEM_BUG_ON(dev_priv->gt.awake); - intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); i915_gem_suspend_late(dev_priv); - intel_disable_gt_powersave(dev_priv); + intel_gt_driver_remove(&dev_priv->gt); /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); @@ -1610,13 +1590,11 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); intel_engines_cleanup(dev_priv); i915_gem_contexts_fini(dev_priv); - i915_gem_fini_scratch(dev_priv); + intel_gt_driver_release(&dev_priv->gt); mutex_unlock(&dev_priv->drm.struct_mutex); intel_wa_list_free(&dev_priv->gt_wa_list); - intel_cleanup_gt_powersave(dev_priv); - intel_uc_cleanup_firmwares(&dev_priv->gt.uc); i915_gem_cleanup_userptr(dev_priv); intel_timelines_fini(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7294fcf053237..528e90ed5de40 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8671,7 +8671,9 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->gt_pm.rps.lock); - intel_disable_rc6(dev_priv); + if (HAS_RC6(dev_priv)) + intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 01a89c071bf5c..91f15fa728cd0 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -192,7 +192,7 @@ struct drm_i915_private *mock_gem_device(void) INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler); - i915->gt.awake = true; + i915->gt.awake = -1; intel_timelines_init(i915); From 5d7f965e5675ae0cbec5fe56af79da9039f723ed Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 5 Sep 2019 10:29:21 +0300 Subject: [PATCH 114/331] drm/i915/buddy: add missing call to i915_global_register We are meant to register the kmem cache at init, such the supplied exit and shrink hooks can be called. Signed-off-by: Matthew Auld Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190905072921.7979-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_buddy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index fe1871d7c1260..e9d4200ce3bc6 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -38,6 +38,7 @@ int __init i915_global_buddy_init(void) if (!global.slab_blocks) return -ENOMEM; + i915_global_register(&global.base); return 0; } From d810583fc2fcf139cc766eb2303500b2d9cf064d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 7 Sep 2019 11:50:46 +0100 Subject: [PATCH 115/331] drm/i915/execlists: Remove incorrect BUG_ON for schedule-out As we may unwind incomplete requests (for preemption) prior to processing the CSB and the schedule-out events, we may update rq->engine (resetting it to point back to the parent virtual engine) prior to calling execlists_schedule_out(), invalidating the assertion that the request still points to the inflight engine. (The likelihood of this is increased if the CSB interrupt processing is pushed to the ksoftirqd for being too slow and direct submission overtakes it.) Tvrtko summarised it as: "So unwind from direct submission resets rq->engine and races with process_csb from the tasklet which notices request has actually completed." Reported-by: Vinay Belgaumkar Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Vinay Belgaumkar Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190907105046.19934-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0ddfbebbcbbc2..f073aea6a1fe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -631,7 +631,6 @@ execlists_schedule_out(struct i915_request *rq) struct intel_engine_cs *cur, *old; trace_i915_request_out(rq); - GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); old = READ_ONCE(ce->inflight); do From 1e0a96e508827ad88229bff32ea2641c83fc52a8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 9 Sep 2019 13:40:50 +0100 Subject: [PATCH 116/331] drm/i915: export color_differs Export color_differs so that we can use it elsewhere. Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Rodrigo Vivi Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190909124052.22900-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 11 ++++------- drivers/gpu/drm/i915/i915_vma.h | 6 ++++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 906dc6fff3832..095f5e358a583 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2552,7 +2552,7 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *start, u64 *end) { - if (node->allocated && node->color != color) + if (i915_node_color_differs(node, color)) *start += I915_GTT_PAGE_SIZE; /* Also leave a space between the unallocated reserved node after the diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e0e677b2a3a94..a90bd2678353a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -477,11 +477,6 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) vma->flags &= ~I915_VMA_CAN_FENCE; } -static bool color_differs(struct drm_mm_node *node, unsigned long color) -{ - return node->allocated && node->color != color; -} - bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) { struct drm_mm_node *node = &vma->node; @@ -502,11 +497,13 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) GEM_BUG_ON(list_empty(&node->node_list)); other = list_prev_entry(node, node_list); - if (color_differs(other, cache_level) && !drm_mm_hole_follows(other)) + if (i915_node_color_differs(other, cache_level) && + !drm_mm_hole_follows(other)) return false; other = list_next_entry(node, node_list); - if (color_differs(other, cache_level) && !drm_mm_hole_follows(node)) + if (i915_node_color_differs(other, cache_level) && + !drm_mm_hole_follows(node)) return false; return true; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 889fc7cb910ac..5b1e0cf7669d6 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -373,6 +373,12 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma, return vma->flags & where; } +static inline bool i915_node_color_differs(const struct drm_mm_node *node, + unsigned long color) +{ + return node->allocated && node->color != color; +} + /** * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture * @vma: VMA to iomap From e9ceb751ad4ee69591bcd33b2a220738855395ea Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 9 Sep 2019 13:40:51 +0100 Subject: [PATCH 117/331] drm/i915: s/i915_gtt_color_adjust/i915_ggtt_color_adjust Make it clear that the color adjust callback applies to the ggtt. Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Rodrigo Vivi Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190909124052.22900-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++----- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 095f5e358a583..48688d683e951 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2547,10 +2547,10 @@ static int ggtt_set_pages(struct i915_vma *vma) return 0; } -static void i915_gtt_color_adjust(const struct drm_mm_node *node, - unsigned long color, - u64 *start, - u64 *end) +static void i915_ggtt_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) { if (i915_node_color_differs(node, color)) *start += I915_GTT_PAGE_SIZE; @@ -3206,7 +3206,7 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) - ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; + ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; if (!io_mapping_init_wc(&ggtt->iomap, ggtt->gmadr.start, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index cb30c669b1b79..fca38167bdce4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -276,7 +276,7 @@ static int igt_evict_for_cache_color(void *arg) /* Currently the use of color_adjust is limited to cache domains within * the ggtt, and so the presence of mm.color_adjust is assumed to be - * i915_gtt_color_adjust throughout our driver, so using a mock color + * i915_ggtt_color_adjust throughout our driver, so using a mock color * adjust will work just fine for our purposes. */ ggtt->vm.mm.color_adjust = mock_color_adjust; From 33dd8899231372a438bf7d96afa1dbe13a5e17c7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 9 Sep 2019 13:40:52 +0100 Subject: [PATCH 118/331] drm/i915: cleanup cache-coloring Try to tidy up the cache-coloring such that we rid the code of any mm.color_adjust assumptions, this should hopefully make it more obvious in the code when we need to actually use the cache-level as the color, and as a bonus should make adding a different color-scheme simpler. Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Rodrigo Vivi Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190909124052.22900-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 +++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 12 +++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +++++ drivers/gpu/drm/i915/i915_vma.c | 22 +++++++++---------- drivers/gpu/drm/i915/i915_vma.h | 2 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 10 +++++---- 7 files changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 6af740a5e3db1..da3e7cf12aa17 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -294,8 +294,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } } - list_for_each_entry(vma, &obj->vma.list, obj_link) - vma->node.color = cache_level; + list_for_each_entry(vma, &obj->vma.list, obj_link) { + if (i915_vm_has_cache_coloring(vma->vm)) + vma->node.color = cache_level; + } i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db7480831e52b..e289b4ffd34bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2364,7 +2364,7 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, - unsigned cache_level, + unsigned long color, u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 52c86c6e06739..e76c9da9992db 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -70,7 +70,7 @@ mark_free(struct drm_mm_scan *scan, * @vm: address space to evict from * @min_size: size of the desired free space * @alignment: alignment constraint of the desired free space - * @cache_level: cache_level for the desired space + * @color: color for the desired space * @start: start (inclusive) of the range from which to evict objects * @end: end (exclusive) of the range from which to evict objects * @flags: additional flags to control the eviction algorithm @@ -91,7 +91,7 @@ mark_free(struct drm_mm_scan *scan, int i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, - unsigned cache_level, + unsigned long color, u64 start, u64 end, unsigned flags) { @@ -124,7 +124,7 @@ i915_gem_evict_something(struct i915_address_space *vm, if (flags & PIN_MAPPABLE) mode = DRM_MM_INSERT_LOW; drm_mm_scan_init_with_range(&scan, &vm->mm, - min_size, alignment, cache_level, + min_size, alignment, color, start, end, mode); /* @@ -266,7 +266,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, u64 start = target->start; u64 end = start + target->size; struct i915_vma *vma, *next; - bool check_color; int ret = 0; lockdep_assert_held(&vm->i915->drm.struct_mutex); @@ -283,8 +282,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, if (!(flags & PIN_NONBLOCK)) i915_retire_requests(vm->i915); - check_color = vm->mm.color_adjust; - if (check_color) { + if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ if (start) start -= I915_GTT_PAGE_SIZE; @@ -310,7 +308,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * abutt and conflict. If they are in conflict, then we evict * those as well to make room for our guard pages. */ - if (check_color) { + if (i915_vm_has_cache_coloring(vm)) { if (node->start + node->size == target->start) { if (node->color == target->color) continue; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 07c85c134d4c4..201788126a893 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -376,6 +376,12 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); } +static inline bool +i915_vm_has_cache_coloring(struct i915_address_space *vm) +{ + return i915_is_ggtt(vm) && vm->mm.color_adjust; +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index a90bd2678353a..68d9f9b4d050e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -477,7 +477,7 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) vma->flags &= ~I915_VMA_CAN_FENCE; } -bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) { struct drm_mm_node *node = &vma->node; struct drm_mm_node *other; @@ -489,7 +489,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) * these constraints apply and set the drm_mm.color_adjust * appropriately. */ - if (vma->vm->mm.color_adjust == NULL) + if (!i915_vm_has_cache_coloring(vma->vm)) return true; /* Only valid to be called on an already inserted vma */ @@ -497,12 +497,12 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) GEM_BUG_ON(list_empty(&node->node_list)); other = list_prev_entry(node, node_list); - if (i915_node_color_differs(other, cache_level) && + if (i915_node_color_differs(other, color) && !drm_mm_hole_follows(other)) return false; other = list_next_entry(node, node_list); - if (i915_node_color_differs(other, cache_level) && + if (i915_node_color_differs(other, color) && !drm_mm_hole_follows(node)) return false; @@ -539,7 +539,7 @@ static int i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_private *dev_priv = vma->vm->i915; - unsigned int cache_level; + unsigned long color; u64 start, end; int ret; @@ -580,14 +580,14 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return -ENOSPC; } + color = 0; if (vma->obj) { ret = i915_gem_object_pin_pages(vma->obj); if (ret) return ret; - cache_level = vma->obj->cache_level; - } else { - cache_level = 0; + if (i915_vm_has_cache_coloring(vma->vm)) + color = vma->obj->cache_level; } GEM_BUG_ON(vma->pages); @@ -605,7 +605,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } ret = i915_gem_gtt_reserve(vma->vm, &vma->node, - size, offset, cache_level, + size, offset, color, flags); if (ret) goto err_clear; @@ -644,7 +644,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } ret = i915_gem_gtt_insert(vma->vm, &vma->node, - size, alignment, cache_level, + size, alignment, color, start, end, flags); if (ret) goto err_clear; @@ -653,7 +653,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(vma->node.start + vma->node.size > end); } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); mutex_lock(&vma->vm->mutex); list_move_tail(&vma->vm_link, &vma->vm->bound_list); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 5b1e0cf7669d6..425bda4db2c26 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -295,7 +295,7 @@ i915_vma_compare(struct i915_vma *vma, int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); -bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index fca38167bdce4..2905fb21d8663 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -274,12 +274,14 @@ static int igt_evict_for_cache_color(void *arg) LIST_HEAD(objects); int err; - /* Currently the use of color_adjust is limited to cache domains within - * the ggtt, and so the presence of mm.color_adjust is assumed to be - * i915_ggtt_color_adjust throughout our driver, so using a mock color - * adjust will work just fine for our purposes. + /* + * Currently the use of color_adjust for the GGTT is limited to cache + * coloring and guard pages, and so the presence of mm.color_adjust for + * the GGTT is assumed to be i915_ggtt_color_adjust, hence using a mock + * color adjust will work just fine for our purposes. */ ggtt->vm.mm.color_adjust = mock_color_adjust; + GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm)); obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { From fd521d3b0ed2eb8b8c67c701795ecb08b9c3f544 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 9 Sep 2019 18:16:46 +0100 Subject: [PATCH 119/331] drm/i915: include GTT page-size info in error state It might prove useful in the future to know if the vma is utilising huge-GTT-pages. Related to this is the GTT cache, where there is some HW "quirkiness" where it must be disabled if using 2M pages, so include that for good measure. Suggested-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190909171646.22090-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 - drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_gpu_error.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 13b9dc0e1a899..a558edf15ec80 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -160,7 +160,6 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; - /* TODO: whack some of this into the error state */ struct i915_page_sizes { /** * The sg mask of the pages sg_table. i.e the mask of diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3ccf7fd9307f3..6384a06aa5bfb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -575,6 +575,9 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, lower_32_bits(obj->gtt_offset)); } + if (obj->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) + err_printf(m, "gtt_page_sizes = 0x%08x\n", obj->gtt_page_sizes); + err_compression_marker(m); for (page = 0; page < obj->page_count; page++) { int i, len; @@ -735,6 +738,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (IS_GEN(m->i915, 7)) err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); + if (IS_GEN_RANGE(m->i915, 8, 11)) + err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); + for (ee = error->engine; ee; ee = ee->next) error_print_engine(m, ee, error->epoch); @@ -985,6 +991,7 @@ i915_error_object_create(struct drm_i915_private *i915, dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->gtt_page_sizes = vma->page_sizes.gtt; dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0; @@ -1554,6 +1561,9 @@ static void capture_reg_state(struct i915_gpu_state *error) error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); } + if (IS_GEN_RANGE(i915, 8, 11)) + error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); + /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index df9f577666262..63cf387411e02 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -74,6 +74,7 @@ struct i915_gpu_state { u32 gam_ecochk; u32 gab_ctl; u32 gfx_mode; + u32 gtt_cache; u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; @@ -127,6 +128,7 @@ struct i915_gpu_state { struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; + u32 gtt_page_sizes; int num_pages; int page_count; int unused; From fa9a09f15065650d97e3b1336d11e4ad9672b759 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2019 09:02:08 +0100 Subject: [PATCH 120/331] drm/i915/execlists: Clear STOP_RING bit on reset During reset, we try to ensure no forward progress of the CS prior to the reset by setting the STOP_RING bit in RING_MI_MODE. Since gen9, this register is context saved and do we end up in the odd situation where we save the STOP_RING bit and so try to stop the engine again immediately upon resume. This is quite unexpected and causes us to complain about an early CS completion event! Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111514 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190910080208.4223-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 12 ++++++++++++ drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f073aea6a1fe2..64170ce0c91b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2368,6 +2368,17 @@ static struct i915_request *active_request(struct i915_request *rq) return active; } +static void __execlists_reset_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (INTEL_GEN(engine->i915) >= 9) { + regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING; + regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16; + } +} + static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -2455,6 +2466,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) GEM_TRACE("%s replay {head:%04x, tail:%04x\n", engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); + __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 68caf85418660..7e773e74a3fe0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -39,6 +39,8 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_END 0x44 +#define GEN9_CTX_RING_MI_MODE 0x54 + /* GEN12+ Reg State Context */ #define GEN12_CTX_BB_PER_CTX_PTR 0x12 #define GEN12_CTX_LRI_HEADER_3 0x41 From b0a7c754140bee6a1124b5bfad17205149942fd7 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Mon, 9 Sep 2019 17:31:41 +0530 Subject: [PATCH 121/331] drm/i915/display: Add gamma precision function for CHV intel_color_get_gamma_bit_precision() is extended for cherryview by adding chv_gamma_precision(), i965 will use existing i9xx_gamma_precision() func only. Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1568030503-26747-2-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 25 ++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 6d641e1598993..4d9a5686a4d79 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1400,6 +1400,14 @@ static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) } } +static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) + return 10; + else + return i9xx_gamma_precision(crtc_state); +} + static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) { switch (crtc_state->gamma_mode) { @@ -1421,12 +1429,17 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat if (!crtc_state->gamma_enable) return 0; - if (HAS_GMCH(dev_priv) && !IS_CHERRYVIEW(dev_priv)) - return i9xx_gamma_precision(crtc_state); - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) - return glk_gamma_precision(crtc_state); - else if (IS_IRONLAKE(dev_priv)) - return ilk_gamma_precision(crtc_state); + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) + return chv_gamma_precision(crtc_state); + else + return i9xx_gamma_precision(crtc_state); + } else { + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + return glk_gamma_precision(crtc_state); + else if (IS_IRONLAKE(dev_priv)) + return ilk_gamma_precision(crtc_state); + } return 0; } From 8efd06989df45fc81eb675e18e05b579bf064e8a Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Mon, 9 Sep 2019 17:31:42 +0530 Subject: [PATCH 122/331] drm/i915/display: Extract i965_read_luts() For i965, add hw read out to create hw blob of gamma lut values. Review comments from old series: https://patchwork.freedesktop.org/series/58039/ v4: -No need to initialize *blob [Jani] -Removed right shifts [Jani] -Dropped dev local var [Jani] v5: -Returned blob instead of assigning it internally within the function [Ville] -Renamed i965_get_color_config() to i965_read_lut() [Ville] -Renamed i965_get_gamma_config_10p6() to i965_read_gamma_lut_10p6() [Ville] v9: -Typo and 80 character limit [Uma] -Made read func para as const [Ville, Uma] -Renamed i965_read_gamma_lut_10p6() to i965_read_lut_10p6() [Ville, Uma] v10: -Swapped ldw and udw while creating hw blob [Jani] -Added last index rgb lut value from PIPEGCMAX to h/w blob [Jani] Signed-off-by: Swati Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1568030503-26747-3-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 50 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 4 ++ 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 4d9a5686a4d79..765f858f66d7f 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1569,6 +1569,55 @@ static void i9xx_read_luts(struct intel_crtc_state *crtc_state) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); } +static struct drm_property_blob * +i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val1, val2; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size - 1; i++) { + val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); + val2 = I915_READ(PALETTE(pipe, 2 * i + 1)); + + blob_data[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_RED_MASK, val1); + blob_data[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_GREEN_MASK, val1); + blob_data[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_BLUE_MASK, val1); + } + + blob_data[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 0))); + blob_data[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 1))); + blob_data[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 2))); + + return blob; +} + +static void i965_read_luts(struct intel_crtc_state *crtc_state) +{ + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = i965_read_lut_10p6(crtc_state); +} + static struct drm_property_blob * ilk_read_lut_10(const struct intel_crtc_state *crtc_state) { @@ -1672,6 +1721,7 @@ void intel_color_init(struct intel_crtc *crtc) dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = i965_load_luts; + dev_priv->display.read_luts = i965_read_luts; } else { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 006cffd56be27..1dc5487593f1a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3558,6 +3558,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _PALETTE_A 0xa000 #define _PALETTE_B 0xa800 #define _CHV_PALETTE_C 0xc000 +#define PALETTE_RED_MASK REG_GENMASK(23, 16) +#define PALETTE_GREEN_MASK REG_GENMASK(15, 8) +#define PALETTE_BLUE_MASK REG_GENMASK(7, 0) #define PALETTE(pipe, i) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + \ _PICK((pipe), _PALETTE_A, \ _PALETTE_B, _CHV_PALETTE_C) + \ @@ -5767,6 +5770,7 @@ enum { #define _PIPEAGCMAX 0x70010 #define _PIPEBGCMAX 0x71010 +#define PIPEGCMAX_RGB_MASK REG_GENMASK(15, 0) #define PIPEGCMAX(pipe, i) _MMIO_PIPE2(pipe, _PIPEAGCMAX + (i) * 4) #define _PIPE_MISC_A 0x70030 From 4d154d33941dad07b9bb49c9dee356342a98e00a Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Mon, 9 Sep 2019 17:31:43 +0530 Subject: [PATCH 123/331] drm/i915/display: Extract chv_read_luts() For cherryview, add hw read out to create hw blob of gamma lut values. Review comments from previous series: https://patchwork.freedesktop.org/patch/328252 v4: -No need to initialize *blob [Jani] -Removed right shifts [Jani] -Dropped dev local var [Jani] v5: -Returned blob instead of assigning it internally within the function [Ville] -Renamed function cherryview_get_color_config() to chv_read_luts() -Renamed cherryview_get_gamma_config() to chv_read_cgm_gamma_lut() [Ville] v9: -80 character limit [Uma] -Made read func para as const [Ville, Uma] -Renamed chv_read_cgm_gamma_lut() to chv_read_cgm_gamma_lut() [Ville, Uma] Signed-off-by: Swati Sharma Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1568030503-26747-4-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 43 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 3 ++ 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 765f858f66d7f..318308dc136cb 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1618,6 +1618,48 @@ static void i965_read_luts(struct intel_crtc_state *crtc_state) crtc_state->base.gamma_lut = i965_read_lut_10p6(crtc_state); } +static struct drm_property_blob * +chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size; i++) { + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_BLUE_MASK, val), 10); + + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1)); + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_RED_MASK, val), 10); + } + + return blob; +} + +static void chv_read_luts(struct intel_crtc_state *crtc_state) +{ + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = chv_read_cgm_lut(crtc_state); +} + static struct drm_property_blob * ilk_read_lut_10(const struct intel_crtc_state *crtc_state) { @@ -1717,6 +1759,7 @@ void intel_color_init(struct intel_crtc *crtc) dev_priv->display.color_check = chv_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = chv_load_luts; + dev_priv->display.read_luts = chv_read_luts; } else if (INTEL_GEN(dev_priv) >= 4) { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1dc5487593f1a..bf37ecebc82f8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10421,6 +10421,9 @@ enum skl_power_gate { #define CGM_PIPE_MODE_GAMMA (1 << 2) #define CGM_PIPE_MODE_CSC (1 << 1) #define CGM_PIPE_MODE_DEGAMMA (1 << 0) +#define CGM_PIPE_GAMMA_RED_MASK REG_GENMASK(9, 0) +#define CGM_PIPE_GAMMA_GREEN_MASK REG_GENMASK(25, 16) +#define CGM_PIPE_GAMMA_BLUE_MASK REG_GENMASK(9, 0) #define _CGM_PIPE_B_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x69900) #define _CGM_PIPE_B_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x69904) From 198d2533669bbe162e52eee2c35ddd8594967556 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 7 Sep 2019 09:43:34 +0100 Subject: [PATCH 124/331] drm/i915/execlists: Ignore lost completion events Icelake hit an issue where it missed reporting a completion event and instead jumped straight to a idle->active event (skipping over the active->idle and not even hitting the lite-restore preemption). 661497511us : process_csb: rcs0 cs-irq head=11, tail=0 661497512us : process_csb: rcs0 csb[0]: status=0x10008002:0x00000020 [lite-restore] 661497512us : trace_ports: rcs0: preempted { 28cc8:11052, 0:0 } 661497513us : trace_ports: rcs0: promote { 28cc8:11054, 0:0 } 661497514us : __i915_request_submit: rcs0 fence 28cc8:11056, current 11052 661497514us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes 661497515us : trace_ports: rcs0: submit { 28cc8:11056, 0:0 } 661497530us : process_csb: rcs0 cs-irq head=0, tail=1 661497530us : process_csb: rcs0 csb[1]: status=0x10008002:0x00000020 [lite-restore] 661497531us : trace_ports: rcs0: preempted { 28cc8:11054!, 0:0 } 661497535us : trace_ports: rcs0: promote { 28cc8:11056, 0:0 } 661497540us : __i915_request_submit: rcs0 fence 28cc8:11058, current 11054 661497544us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes 661497545us : trace_ports: rcs0: submit { 28cc8:11058, 0:0 } 661497553us : process_csb: rcs0 cs-irq head=1, tail=2 661497553us : process_csb: rcs0 csb[2]: status=0x10000001:0x00000000 [idle->active] 661497574us : process_csb: process_csb:1538 GEM_BUG_ON(*execlists->active) Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190907084334.28952-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 63 +++++++++-------------------- 1 file changed, 18 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 64170ce0c91b1..dcdf7cf66e7ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -692,6 +692,9 @@ trace_ports(const struct intel_engine_execlists *execlists, const struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + if (!ports[0]) + return; + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", engine->name, msg, ports[0]->fence.context, @@ -1380,13 +1383,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } -enum csb_step { - CSB_NOP, - CSB_PROMOTE, - CSB_PREEMPT, - CSB_COMPLETE, -}; - /* * Starting with Gen12, the status has a new format: * @@ -1413,7 +1409,7 @@ enum csb_step { * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline enum csb_step +static inline bool gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) { u32 lower_dw = csb[0]; @@ -1423,7 +1419,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; if (!ctx_away_valid && ctx_to_valid) - return CSB_PROMOTE; + return true; /* * The context switch detail is not guaranteed to be 5 when a preemption @@ -1433,7 +1429,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * would require some extra handling, but we don't support that. */ if (new_queue && ctx_away_valid) - return CSB_PREEMPT; + return true; /* * switch detail = 5 is covered by the case above and we do not expect a @@ -1442,29 +1438,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) */ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); - if (*execlists->active) { - GEM_BUG_ON(!ctx_away_valid); - return CSB_COMPLETE; - } - - return CSB_NOP; + return false; } -static inline enum csb_step +static inline bool gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) { - unsigned int status = *csb; - - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) - return CSB_PROMOTE; - - if (status & GEN8_CTX_STATUS_PREEMPTED) - return CSB_PREEMPT; - - if (*execlists->active) - return CSB_COMPLETE; - - return CSB_NOP; + return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } static void process_csb(struct intel_engine_cs *engine) @@ -1503,7 +1483,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - enum csb_step csb_step; + bool promote; if (++head == num_entries) head = 0; @@ -1531,20 +1511,16 @@ static void process_csb(struct intel_engine_cs *engine) buf[2 * head + 0], buf[2 * head + 1]); if (INTEL_GEN(engine->i915) >= 12) - csb_step = gen12_csb_parse(execlists, buf + 2 * head); + promote = gen12_csb_parse(execlists, buf + 2 * head); else - csb_step = gen8_csb_parse(execlists, buf + 2 * head); - - switch (csb_step) { - case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ + promote = gen8_csb_parse(execlists, buf + 2 * head); + if (promote) { + /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) execlists_schedule_out(*execlists->active++); - /* fallthrough */ - case CSB_PROMOTE: /* switch pending to inflight */ - GEM_BUG_ON(*execlists->active); + /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); execlists->active = memcpy(execlists->inflight, @@ -1559,9 +1535,10 @@ static void process_csb(struct intel_engine_cs *engine) ring_set_paused(engine, 0); WRITE_ONCE(execlists->pending[0], NULL); - break; + } else { + GEM_BUG_ON(!*execlists->active); - case CSB_COMPLETE: /* port0 completed, advanced to port1 */ + /* port0 completed, advanced to port1 */ trace_ports(execlists, "completed", execlists->active); /* @@ -1576,10 +1553,6 @@ static void process_csb(struct intel_engine_cs *engine) GEM_BUG_ON(execlists->active - execlists->inflight > execlists_num_ports(execlists)); - break; - - case CSB_NOP: - break; } } while (head != tail); From 0efa99dd58754d23e884b9ba41cd601f01b58c3d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Sep 2019 12:30:18 +0100 Subject: [PATCH 125/331] drm/i915/ringbuffer: Flush writes before RING_TAIL update Be paranoid and make sure we flush any and all writes out of the WCB before performing the UC mmio to update the RING_TAIL. (An UC write should itself be enough to do the flush, hence the paranoia here.) Quite infrequently, we see problems where the GPU seems to overshoot the RING_TAIL and so executes garbage, hence the speculation. References: https://bugs.freedesktop.org/show_bug.cgi?id=111598 References: https://bugs.freedesktop.org/show_bug.cgi?id=111417 References: https://bugs.freedesktop.org/show_bug.cgi?id=111034 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190909113018.13300-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index ac55a0d054bdb..a73296e6b13d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -930,6 +930,7 @@ static void cancel_requests(struct intel_engine_cs *engine) static void i9xx_submit_request(struct i915_request *request) { i915_request_submit(request); + wmb(); /* paranoid flush writes out of the WCB before mmio */ ENGINE_WRITE(request->engine, RING_TAIL, intel_ring_set_tail(request->ring, request->tail)); From cec5ca08e36fd18d2939b98055346b3b06f56c6c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Sep 2019 12:00:08 +0100 Subject: [PATCH 126/331] drm/i915: Perform GGTT restore much earlier during resume As soon as we re-enable the various functions within the HW, they may go off and read data via a GGTT offset. Hence, if we have not yet restored the GGTT PTE before then, they may read and even *write* random locations in memory. Detected by DMAR faults during resume. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Martin Peres Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190909110011.8958-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 3 --- drivers/gpu/drm/i915/i915_drv.c | 5 +++++ drivers/gpu/drm/i915/selftests/i915_gem.c | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index b3993d24b83db..9b1129aaacfe5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -242,9 +242,6 @@ void i915_gem_resume(struct drm_i915_private *i915) mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - if (i915_gem_init_hw(i915)) goto err_wedged; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 63e52e1534143..0aadff9b8c88e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1914,6 +1914,11 @@ static int i915_drm_resume(struct drm_device *dev) if (ret) DRM_ERROR("failed to re-enable GGTT\n"); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_csr_ucode_resume(dev_priv); i915_restore_state(dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index bb6dd54a6ff3e..37593831b5394 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { intel_gt_sanitize(&i915->gt, false); i915_gem_sanitize(i915); + + mutex_lock(&i915->drm.struct_mutex); + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_resume(i915); } } From 7c465310fefc6a600eb69521b8420564f0f37b2d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Sep 2019 12:00:06 +0100 Subject: [PATCH 127/331] drm/i915/selftests: Take runtime wakeref for igt_ggtt_lowlevel Being a "low-level" test, we opt to bypass the normal bind/unbind hooks for the lower level insert_entries/clear_range. For ggtt, the bind/unbind hooks provide the runtime wakeref and so we must also handle this in exercising the low level hooks. <4> [538.151672] RPM raw-wakeref not held <4> [538.151825] WARNING: CPU: 0 PID: 11 at ./drivers/gpu/drm/i915/intel_runtime_pm.h:107 fwtable_read32+0x1be/0x300 [i915] <4> [538.151830] Modules linked in: i915(+) amdgpu gpu_sched ttm vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic mei_hdcp btusb btrtl btbcm x86_pkg_temp_thermal coretemp btintel crct10dif_pclmul bluetooth crc32_pclmul snd_intel_nhlt snd_hda_codec ecdh_generic ghash_clmulni_intel ecc snd_hwdep snd_hda_core lpc_ich r8169 realtek snd_pcm mei_me mei prime_numbers pinctrl_broxton pinctrl_intel [last unloaded: i915] <4> [538.151861] CPU: 0 PID: 11 Comm: migration/0 Tainted: G U 5.3.0-rc7-CI-Trybot_4938+ #1 <4> [538.151864] Hardware name: Intel corporation NUC6CAYS/NUC6CAYB, BIOS AYAPLCEL.86A.0056.2018.0926.1100 09/26/2018 <4> [538.151960] RIP: 0010:fwtable_read32+0x1be/0x300 [i915] <4> [538.151965] Code: e8 e7 f9 5f e0 e9 0b ff ff ff 80 3d d5 8d 26 00 00 0f 85 81 fe ff ff 48 c7 c7 ef 01 bd a0 c6 05 c1 8d 26 00 01 e8 b2 e4 6a e0 <0f> 0b e9 67 fe ff ff 80 3d ad 8d 26 00 00 0f 85 65 fe ff ff 48 c7 <4> [538.151969] RSP: 0018:ffffc9000007be10 EFLAGS: 00010086 <4> [538.151972] RAX: 0000000000000000 RBX: ffff88826be10d50 RCX: 0000000000000002 <4> [538.151975] RDX: 0000000080000002 RSI: 0000000000000000 RDI: 00000000ffffffff <4> [538.151978] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 <4> [538.151981] R10: 0000000000000000 R11: ffffc9000007bcb0 R12: 0000000000101008 <4> [538.151984] R13: 0000000000000000 R14: ffffc9000036f638 R15: 0000000000000002 <4> [538.151987] FS: 0000000000000000(0000) GS:ffff888277a00000(0000) knlGS:0000000000000000 <4> [538.151990] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [538.151993] CR2: 00007fd48e7052f8 CR3: 0000000005210000 CR4: 00000000003406f0 <4> [538.151995] Call Trace: <4> [538.152106] bxt_vtd_ggtt_clear_range__cb+0x38/0x40 [i915] Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190909110011.8958-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 31a51ca1ddcbb..598c18d10640a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -293,18 +293,20 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma.node.size = BIT_ULL(size); mock_vma.node.start = addr; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + vm->insert_entries(vm, &mock_vma, + I915_CACHE_NONE, 0); } count = n; i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { u64 addr = hole_start + order[n] * BIT_ULL(size); + intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); - vm->clear_range(vm, addr, BIT_ULL(size)); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + vm->clear_range(vm, addr, BIT_ULL(size)); } i915_gem_object_unpin_pages(obj); From 07e98eb0a17482557198ae12a07a6e3a89de33a3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2019 13:10:09 +0100 Subject: [PATCH 128/331] drm/i915/selftests: Tighten the timeout testing for partial mmaps Currently, if there is time remaining before the start of the loop, we do one full iteration over many possible different chunks within the object. A full loop may take 50+s (depending on speed of indirect GTT mmapings) and we try separately with LINEAR, X and Y -- at which point igt times out. If we check more frequently, we will interrupt the loop upon our timeout -- it is hard to argue for as this significantly reduces the test coverage as we dramatically reduce the runtime. In practical terms, the coverage we should prioritise is in using different fence setups, forcing verification of the tile row computations over the current preference of checking extracting chunks. Though the exhaustive search is great given an infinite timeout, to improve our current coverage, we also add a randomised smoketest of partial mmaps. So let's do both, add a randomised smoketest of partial tiling chunks and the exhaustive (though time limited) search for failures. Even in adding another subtest, we should shave 100s off BAT! (With, hopefully, no loss in coverage, at least over multiple runs.) Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190910121009.13431-1-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_mman.c | 255 +++++++++++++++--- 1 file changed, 223 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 1d27babff0cee..aefe557527f81 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -10,6 +10,7 @@ #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" +#include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" struct tile { @@ -76,18 +77,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v) static int check_partial_mapping(struct drm_i915_gem_object *obj, const struct tile *tile, - unsigned long end_time) + struct rnd_state *prng) { - const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; int err; - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", err); + return err; + } + + page = i915_prandom_u32_max_state(npages, prng); + view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + err = PTR_ERR(io); + goto out; + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + goto out; + + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + +out: + i915_vma_destroy(vma); + return err; +} + +static int check_partial_mappings(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); if (err) { @@ -170,11 +256,42 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return err; i915_vma_destroy(vma); + + if (igt_timeout(end_time, + "%s: timed out after tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; } return 0; } +static unsigned int +setup_tile_size(struct tile *tile, struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) <= 2) { + tile->height = 16; + tile->width = 128; + tile->size = 11; + } else if (tile->tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile->height = 32; + tile->width = 128; + tile->size = 12; + } else { + tile->height = 8; + tile->width = 512; + tile->size = 12; + } + + if (INTEL_GEN(i915) < 4) + return 8192 / tile->width; + else if (INTEL_GEN(i915) < 7) + return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; + else + return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; +} + static int igt_partial_tiling(void *arg) { const unsigned int nreal = 1 << 12; /* largest tile row x2 */ @@ -219,7 +336,7 @@ static int igt_partial_tiling(void *arg) tile.swizzle = I915_BIT_6_SWIZZLE_NONE; tile.tiling = I915_TILING_NONE; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err && err != -EINTR) goto out_unlock; } @@ -253,31 +370,11 @@ static int igt_partial_tiling(void *arg) tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + max_pitch = setup_tile_size(&tile, i915); for (pitch = max_pitch; pitch; pitch >>= 1) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -285,7 +382,7 @@ static int igt_partial_tiling(void *arg) if (pitch > 2 && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -294,7 +391,7 @@ static int igt_partial_tiling(void *arg) if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -305,7 +402,7 @@ static int igt_partial_tiling(void *arg) if (INTEL_GEN(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -325,6 +422,99 @@ next_tiling: ; return err; } +static int igt_smoke_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + I915_RND_STATE(prng); + unsigned long count; + IGT_TIMEOUT(end); + int err; + + /* + * igt_partial_tiling() does an exhastive check of partial tiling + * chunking, but will undoubtably run out of time. Here, we do a + * randomised search and hope over many runs of 1s with different + * seeds we will do a thorough check. + * + * Remember to look at the st_seed if we see a flip-flop in BAT! + */ + + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return 0; + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + do { + struct tile tile; + + tile.tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); + switch (tile.tiling) { + case I915_TILING_NONE: + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + break; + + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (tile.tiling != I915_TILING_NONE) { + unsigned int max_pitch = setup_tile_size(&tile, i915); + + tile.stride = + i915_prandom_u32_max_state(max_pitch, &prng); + tile.stride = (1 + tile.stride) * tile.width; + if (INTEL_GEN(i915) < 4) + tile.stride = rounddown_pow_of_two(tile.stride); + } + + err = check_partial_mapping(obj, &tile, &prng); + if (err) + break; + + count++; + } while (!__igt_timeout(end, NULL)); + + pr_info("%s: Completed %lu trials\n", __func__, count); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); @@ -515,6 +705,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), + SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), }; From ab37c4d712c8b35c0c1e3237e04fe49f717055a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2019 17:16:57 +0100 Subject: [PATCH 129/331] drm/i915/tgl: Disable rc6 for debugging Empirical evidence from CI tells us that our rc6 setup for Tigerlake is off. Disable rc6 on tgl temporary so that we gain CI coverage as we prepare a fix. It also appears that the BIOS on our tgl leaves rc6 enabled, so we have to explicitly disable it on init. References: https://bugs.freedesktop.org/show_bug.cgi?id=111593 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190910161657.23037-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index fbe98a2db88e1..b3cc8560696b6 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -797,6 +797,7 @@ static const struct intel_device_info intel_tigerlake_12_info = { .display.has_modular_fia = 1, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), + .has_rc6 = false, /* XXX disabled for debugging */ }; #undef GEN diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 528e90ed5de40..54a8b7705bc68 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8671,8 +8671,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->gt_pm.rps.lock); - if (HAS_RC6(dev_priv)) - intel_disable_rc6(dev_priv); + intel_disable_rc6(dev_priv); intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) From 71dc367e2bc37e1c235029003fbbc56a07fb6d58 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 09:05:20 -0700 Subject: [PATCH 130/331] drm/i915: Consolidate bxt/cnl/icl cdclk readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aside from a few minor register changes and some different clock values, cdclk design hasn't changed much since gen9lp. Let's consolidate the handlers for bxt, cnl, and icl to keep the codeflow consistent. Also, while we're at it, s/bxt_de_pll_update/bxt_de_pll_readout/ since "update" makes me think we should be writing to hardware rather than reading from it. v2: - Fix icl_calc_voltage_level() limits. (Ville) - Use CNL_CDCLK_PLL_RATIO_MASK rather than BXT_DE_PLL_RATIO_MASK on gen10+ to avoid confusion. (Ville) v3: - Also fix ehl_calc_voltage_level() limits. (Ville) Cc: Ville Syrjälä Suggested-by: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910160520.6587-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 325 +++++++++------------ 1 file changed, 138 insertions(+), 187 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index d3e56628af70b..dfcb1cc58951e 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1190,6 +1190,36 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } +static u8 cnl_calc_voltage_level(int cdclk) +{ + if (cdclk > 336000) + return 2; + else if (cdclk > 168000) + return 1; + else + return 0; +} + +static u8 icl_calc_voltage_level(int cdclk) +{ + if (cdclk > 556800) + return 2; + else if (cdclk > 312000) + return 1; + else + return 0; +} + +static u8 ehl_calc_voltage_level(int cdclk) +{ + if (cdclk > 312000) + return 2; + else if (cdclk > 180000) + return 1; + else + return 0; +} + static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; @@ -1236,23 +1266,69 @@ static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) return dev_priv->cdclk.hw.ref * ratio; } -static void bxt_de_pll_update(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) +static void cnl_readout_refclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - u32 val; + if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) + cdclk_state->ref = 24000; + else + cdclk_state->ref = 19200; +} - cdclk_state->ref = 19200; - cdclk_state->vco = 0; +static void icl_readout_refclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 dssm = I915_READ(SKL_DSSM) & ICL_DSSM_CDCLK_PLL_REFCLK_MASK; + + switch (dssm) { + default: + MISSING_CASE(dssm); + /* fall through */ + case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: + cdclk_state->ref = 24000; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: + cdclk_state->ref = 19200; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: + cdclk_state->ref = 38400; + break; + } +} + +static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 val, ratio; + + if (INTEL_GEN(dev_priv) >= 11) + icl_readout_refclk(dev_priv, cdclk_state); + else if (IS_CANNONLAKE(dev_priv)) + cnl_readout_refclk(dev_priv, cdclk_state); + else + cdclk_state->ref = 19200; val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || + (val & BXT_DE_PLL_LOCK) == 0) { + /* + * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but + * setting it to zero is a way to signal that. + */ + cdclk_state->vco = 0; return; + } - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; + /* + * CNL+ have the ratio directly in the PLL enable register, gen9lp had + * it in a separate PLL control register. + */ + if (INTEL_GEN(dev_priv) >= 10) + ratio = val & CNL_CDCLK_PLL_RATIO_MASK; + else + ratio = I915_READ(BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK; - val = I915_READ(BXT_DE_PLL_CTL); - cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; + cdclk_state->vco = ratio * cdclk_state->ref; } static void bxt_get_cdclk(struct drm_i915_private *dev_priv, @@ -1261,12 +1337,18 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, u32 divider; int div; - bxt_de_pll_update(dev_priv, cdclk_state); - - cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; + if (INTEL_GEN(dev_priv) >= 12) + cdclk_state->bypass = cdclk_state->ref / 2; + else if (INTEL_GEN(dev_priv) >= 11) + cdclk_state->bypass = 50000; + else + cdclk_state->bypass = cdclk_state->ref; - if (cdclk_state->vco == 0) + bxt_de_pll_readout(dev_priv, cdclk_state); + if (cdclk_state->vco == 0) { + cdclk_state->cdclk = cdclk_state->bypass; goto out; + } divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1275,13 +1357,15 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, div = 2; break; case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10, + "Unsupported divider\n"); div = 3; break; case BXT_CDCLK_CD2X_DIV_SEL_2: div = 4; break; case BXT_CDCLK_CD2X_DIV_SEL_4: + WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n"); div = 8; break; default: @@ -1296,8 +1380,18 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, * Can't read this out :( Let's assume it's * at least what the CDCLK frequency requires. */ - cdclk_state->voltage_level = - bxt_calc_voltage_level(cdclk_state->cdclk); + if (IS_ELKHARTLAKE(dev_priv)) + cdclk_state->voltage_level = + ehl_calc_voltage_level(cdclk_state->cdclk); + else if (INTEL_GEN(dev_priv) >= 11) + cdclk_state->voltage_level = + icl_calc_voltage_level(cdclk_state->cdclk); + else if (INTEL_GEN(dev_priv) >= 10) + cdclk_state->voltage_level = + cnl_calc_voltage_level(cdclk_state->cdclk); + else + cdclk_state->voltage_level = + bxt_calc_voltage_level(cdclk_state->cdclk); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1515,76 +1609,6 @@ static int cnl_calc_cdclk(int min_cdclk) return 168000; } -static u8 cnl_calc_voltage_level(int cdclk) -{ - if (cdclk > 336000) - return 2; - else if (cdclk > 168000) - return 1; - else - return 0; -} - -static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 val; - - if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) - cdclk_state->ref = 24000; - else - cdclk_state->ref = 19200; - - cdclk_state->vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - cdclk_state->vco = (val & CNL_CDCLK_PLL_RATIO_MASK) * cdclk_state->ref; -} - -static void cnl_get_cdclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 divider; - int div; - - cnl_cdclk_pll_update(dev_priv, cdclk_state); - - cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; - - if (cdclk_state->vco == 0) - goto out; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - default: - MISSING_CASE(divider); - return; - } - - cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); - - out: - /* - * Can't read this out :( Let's assume it's - * at least what the CDCLK frequency requires. - */ - cdclk_state->voltage_level = - cnl_calc_voltage_level(cdclk_state->cdclk); -} - static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { u32 val; @@ -1830,91 +1854,6 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) return dev_priv->cdclk.hw.ref * ratio; } -static u8 icl_calc_voltage_level(struct drm_i915_private *dev_priv, int cdclk) -{ - if (IS_ELKHARTLAKE(dev_priv)) { - if (cdclk > 312000) - return 2; - else if (cdclk > 180000) - return 1; - else - return 0; - } else { - if (cdclk > 556800) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; - } -} - -static void icl_get_cdclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 val; - int div; - - val = I915_READ(SKL_DSSM); - switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { - default: - MISSING_CASE(val); - /* fall through */ - case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: - cdclk_state->ref = 24000; - break; - case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: - cdclk_state->ref = 19200; - break; - case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: - cdclk_state->ref = 38400; - break; - } - - if (INTEL_GEN(dev_priv) >= 12) - cdclk_state->bypass = cdclk_state->ref / 2; - else - cdclk_state->bypass = 50000; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || - (val & BXT_DE_PLL_LOCK) == 0) { - /* - * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but - * setting it to zero is a way to signal that. - */ - cdclk_state->vco = 0; - cdclk_state->cdclk = cdclk_state->bypass; - goto out; - } - - cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; - - val = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - switch (val) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - default: - MISSING_CASE(val); - div = 2; - break; - } - - cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); - -out: - /* - * Can't read this out :( Let's assume it's - * at least what the CDCLK frequency requires. - */ - cdclk_state->voltage_level = - icl_calc_voltage_level(dev_priv, cdclk_state->cdclk); -} - static void icl_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state sanitized_state; @@ -1946,9 +1885,12 @@ static void icl_init_cdclk(struct drm_i915_private *dev_priv) sanitized_state.cdclk = icl_calc_cdclk(0, sanitized_state.ref); sanitized_state.vco = icl_calc_cdclk_pll_vco(dev_priv, sanitized_state.cdclk); - sanitized_state.voltage_level = - icl_calc_voltage_level(dev_priv, - sanitized_state.cdclk); + if (IS_ELKHARTLAKE(dev_priv)) + sanitized_state.voltage_level = + ehl_calc_voltage_level(sanitized_state.cdclk); + else + sanitized_state.voltage_level = + icl_calc_voltage_level(sanitized_state.cdclk); cnl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); } @@ -1959,8 +1901,12 @@ static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; - cdclk_state.voltage_level = icl_calc_voltage_level(dev_priv, - cdclk_state.cdclk); + if (IS_ELKHARTLAKE(dev_priv)) + cdclk_state.voltage_level = + ehl_calc_voltage_level(cdclk_state.cdclk); + else + cdclk_state.voltage_level = + icl_calc_voltage_level(cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } @@ -2561,9 +2507,14 @@ static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - max(icl_calc_voltage_level(dev_priv, cdclk), - cnl_compute_min_voltage_level(state)); + if (IS_ELKHARTLAKE(dev_priv)) + state->cdclk.logical.voltage_level = + max(ehl_calc_voltage_level(cdclk), + cnl_compute_min_voltage_level(state)); + else + state->cdclk.logical.voltage_level = + max(icl_calc_voltage_level(cdclk), + cnl_compute_min_voltage_level(state)); if (!state->active_pipes) { cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref); @@ -2571,8 +2522,12 @@ static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - icl_calc_voltage_level(dev_priv, cdclk); + if (IS_ELKHARTLAKE(dev_priv)) + state->cdclk.actual.voltage_level = + ehl_calc_voltage_level(cdclk); + else + state->cdclk.actual.voltage_level = + icl_calc_voltage_level(cdclk); } else { state->cdclk.actual = state->cdclk.logical; } @@ -2819,11 +2774,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; } - if (INTEL_GEN(dev_priv) >= 11) - dev_priv->display.get_cdclk = icl_get_cdclk; - else if (IS_CANNONLAKE(dev_priv)) - dev_priv->display.get_cdclk = cnl_get_cdclk; - else if (IS_GEN9_LP(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEN9_LP(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; else if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_cdclk = skl_get_cdclk; From 736da8112fee663deed6efc53333c37dc0b04696 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 09:15:06 -0700 Subject: [PATCH 131/331] drm/i915: Use literal representation of cdclk tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bspec lays out legal cdclk frequencies, PLL ratios, and CD2X dividers in an easy-to-read table for most recent platforms. We've been translating the data from that table into platform-specific code logic, but it's easy to overlook an area we need to update when adding new cdclk values or enabling new platforms. Let's just add a form of the bspec table to the code and then adjust our functions to pull what they need directly out of the table. v2: Fix comparison when finding best cdclk. v3: Another logic fix for calc_cdclk. v4: - Use named initializers for cdclk tables. (Ville) - Include refclk as a field in the table instead of adding all three ratios for each entry. (Ville) - Terminate tables with an empty entry to avoid needing to store the table size. (Ville) - Don't try so hard to return reasonable values from our lookup functions if we get impossible inputs; just WARN and return 0. (Ville) - Keep a bxt_ prefix on the lookup functions since they're still only used on bxt+ for now. We can rename them later if we extend this table-based approach back to older platforms. (Ville) v5: - Fix cnl table's ratios for 24mhz refclk. (Ville) - Don't miss the named initializers on the cnl table. (Ville) - Represent refclk in table as u16 rather than u32. (Ville) Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910161506.7158-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 303 +++++++-------------- drivers/gpu/drm/i915/display/intel_cdclk.h | 7 + drivers/gpu/drm/i915/i915_drv.h | 3 + 3 files changed, 110 insertions(+), 203 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index dfcb1cc58951e..f3431b530966a 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1161,28 +1161,88 @@ static void skl_uninit_cdclk(struct drm_i915_private *dev_priv) skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static int bxt_calc_cdclk(int min_cdclk) -{ - if (min_cdclk > 576000) - return 624000; - else if (min_cdclk > 384000) - return 576000; - else if (min_cdclk > 288000) - return 384000; - else if (min_cdclk > 144000) - return 288000; - else - return 144000; +static const struct intel_cdclk_vals bxt_cdclk_table[] = { + { .refclk = 19200, .cdclk = 144000, .divider = 8, .ratio = 60 }, + { .refclk = 19200, .cdclk = 288000, .divider = 4, .ratio = 60 }, + { .refclk = 19200, .cdclk = 384000, .divider = 3, .ratio = 60 }, + { .refclk = 19200, .cdclk = 576000, .divider = 2, .ratio = 60 }, + { .refclk = 19200, .cdclk = 624000, .divider = 2, .ratio = 65 }, + {} +}; + +static const struct intel_cdclk_vals glk_cdclk_table[] = { + { .refclk = 19200, .cdclk = 79200, .divider = 8, .ratio = 33 }, + { .refclk = 19200, .cdclk = 158400, .divider = 4, .ratio = 33 }, + { .refclk = 19200, .cdclk = 316800, .divider = 2, .ratio = 33 }, + {} +}; + +static const struct intel_cdclk_vals cnl_cdclk_table[] = { + { .refclk = 19200, .cdclk = 168000, .divider = 4, .ratio = 35 }, + { .refclk = 19200, .cdclk = 336000, .divider = 2, .ratio = 35 }, + { .refclk = 19200, .cdclk = 528000, .divider = 2, .ratio = 55 }, + + { .refclk = 24000, .cdclk = 168000, .divider = 4, .ratio = 28 }, + { .refclk = 24000, .cdclk = 336000, .divider = 2, .ratio = 28 }, + { .refclk = 24000, .cdclk = 528000, .divider = 2, .ratio = 44 }, + {} +}; + +static const struct intel_cdclk_vals icl_cdclk_table[] = { + { .refclk = 19200, .cdclk = 172800, .divider = 2, .ratio = 18 }, + { .refclk = 19200, .cdclk = 192000, .divider = 2, .ratio = 20 }, + { .refclk = 19200, .cdclk = 307200, .divider = 2, .ratio = 32 }, + { .refclk = 19200, .cdclk = 326400, .divider = 4, .ratio = 68 }, + { .refclk = 19200, .cdclk = 556800, .divider = 2, .ratio = 58 }, + { .refclk = 19200, .cdclk = 652800, .divider = 2, .ratio = 68 }, + + { .refclk = 24000, .cdclk = 180000, .divider = 2, .ratio = 15 }, + { .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 }, + { .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 }, + { .refclk = 24000, .cdclk = 324000, .divider = 4, .ratio = 54 }, + { .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 }, + { .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 }, + + { .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio = 9 }, + { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, + { .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 }, + { .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 }, + { .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 }, + { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 }, + {} +}; + +static int bxt_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) +{ + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk >= min_cdclk) + return table[i].cdclk; + + WARN(1, "Cannot satisfy minimum cdclk %d with refclk %u\n", + min_cdclk, dev_priv->cdclk.hw.ref); + return 0; } -static int glk_calc_cdclk(int min_cdclk) +static int bxt_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { - if (min_cdclk > 158400) - return 316800; - else if (min_cdclk > 79200) - return 158400; - else - return 79200; + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + if (cdclk == dev_priv->cdclk.hw.bypass) + return 0; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk == cdclk) + return dev_priv->cdclk.hw.ref * table[i].ratio; + + WARN(1, "cdclk %d not valid for refclk %u\n", + cdclk, dev_priv->cdclk.hw.ref); + return 0; } static u8 bxt_calc_voltage_level(int cdclk) @@ -1220,52 +1280,6 @@ static u8 ehl_calc_voltage_level(int cdclk) return 0; } -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } - - return dev_priv->cdclk.hw.ref * ratio; -} - -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 79200: - case 158400: - case 316800: - ratio = 33; - break; - } - - return dev_priv->cdclk.hw.ref * ratio; -} - static void cnl_readout_refclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -1576,13 +1590,8 @@ static void bxt_init_cdclk(struct drm_i915_private *dev_priv) * - The initial CDCLK needs to be read from VBT. * Need to make this change after VBT has changes for BXT. */ - if (IS_GEMINILAKE(dev_priv)) { - cdclk_state.cdclk = glk_calc_cdclk(0); - cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); - } else { - cdclk_state.cdclk = bxt_calc_cdclk(0); - cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); - } + cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); + cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); @@ -1599,16 +1608,6 @@ static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static int cnl_calc_cdclk(int min_cdclk) -{ - if (min_cdclk > 336000) - return 528000; - else if (min_cdclk > 168000) - return 336000; - else - return 168000; -} - static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { u32 val; @@ -1718,29 +1717,6 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; } -static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 168000: - case 336000: - ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28; - break; - case 528000: - ratio = dev_priv->cdclk.hw.ref == 19200 ? 55 : 44; - break; - } - - return dev_priv->cdclk.hw.ref * ratio; -} - static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; @@ -1783,77 +1759,6 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) dev_priv->cdclk.hw.vco = -1; } -static int icl_calc_cdclk(int min_cdclk, unsigned int ref) -{ - static const int ranges_24[] = { 180000, 192000, 312000, 324000, - 552000, 648000 }; - static const int ranges_19_38[] = { 172800, 192000, 307200, 326400, - 556800, 652800 }; - const int *ranges; - int len, i; - - switch (ref) { - default: - MISSING_CASE(ref); - /* fall through */ - case 24000: - ranges = ranges_24; - len = ARRAY_SIZE(ranges_24); - break; - case 19200: - case 38400: - ranges = ranges_19_38; - len = ARRAY_SIZE(ranges_19_38); - break; - } - - for (i = 0; i < len; i++) { - if (min_cdclk <= ranges[i]) - return ranges[i]; - } - - WARN_ON(min_cdclk > ranges[len - 1]); - return ranges[len - 1]; -} - -static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 172800: - case 307200: - case 326400: - case 556800: - case 652800: - WARN_ON(dev_priv->cdclk.hw.ref != 19200 && - dev_priv->cdclk.hw.ref != 38400); - break; - case 180000: - case 312000: - case 324000: - case 552000: - case 648000: - WARN_ON(dev_priv->cdclk.hw.ref != 24000); - break; - case 192000: - WARN_ON(dev_priv->cdclk.hw.ref != 19200 && - dev_priv->cdclk.hw.ref != 38400 && - dev_priv->cdclk.hw.ref != 24000); - break; - } - - ratio = cdclk / (dev_priv->cdclk.hw.ref / 2); - - return dev_priv->cdclk.hw.ref * ratio; -} - static void icl_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state sanitized_state; @@ -1882,8 +1787,8 @@ static void icl_init_cdclk(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); sanitized_state.ref = dev_priv->cdclk.hw.ref; - sanitized_state.cdclk = icl_calc_cdclk(0, sanitized_state.ref); - sanitized_state.vco = icl_calc_cdclk_pll_vco(dev_priv, + sanitized_state.cdclk = bxt_calc_cdclk(dev_priv, 0); + sanitized_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, sanitized_state.cdclk); if (IS_ELKHARTLAKE(dev_priv)) sanitized_state.voltage_level = @@ -1923,8 +1828,8 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = cnl_calc_cdclk(0); - cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); + cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); + cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); @@ -2426,13 +2331,8 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(min_cdclk); - vco = glk_de_pll_vco(dev_priv, cdclk); - } else { - cdclk = bxt_calc_cdclk(min_cdclk); - vco = bxt_de_pll_vco(dev_priv, cdclk); - } + cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; @@ -2440,13 +2340,8 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) bxt_calc_voltage_level(cdclk); if (!state->active_pipes) { - if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(state->cdclk.force_min_cdclk); - vco = glk_de_pll_vco(dev_priv, cdclk); - } else { - cdclk = bxt_calc_cdclk(state->cdclk.force_min_cdclk); - vco = bxt_de_pll_vco(dev_priv, cdclk); - } + cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; @@ -2468,8 +2363,8 @@ static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - cdclk = cnl_calc_cdclk(min_cdclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; @@ -2478,8 +2373,8 @@ static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) cnl_compute_min_voltage_level(state)); if (!state->active_pipes) { - cdclk = cnl_calc_cdclk(state->cdclk.force_min_cdclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; @@ -2495,15 +2390,14 @@ static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - unsigned int ref = state->cdclk.logical.ref; int min_cdclk, cdclk, vco; min_cdclk = intel_compute_min_cdclk(state); if (min_cdclk < 0) return min_cdclk; - cdclk = icl_calc_cdclk(min_cdclk, ref); - vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; @@ -2517,8 +2411,8 @@ static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) cnl_compute_min_voltage_level(state)); if (!state->active_pipes) { - cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref); - vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; @@ -2754,12 +2648,15 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.set_cdclk = cnl_set_cdclk; dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->display.set_cdclk = cnl_set_cdclk; dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->cdclk.table = bxt_cdclk_table; } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 4d6f7f5f89300..1afa84ab60186 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -15,6 +15,13 @@ struct intel_atomic_state; struct intel_cdclk_state; struct intel_crtc_state; +struct intel_cdclk_vals { + u16 refclk; + u32 cdclk; + u8 divider; /* CD2X divider * 2 */ + u8 ratio; +}; + int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); void intel_cdclk_init(struct drm_i915_private *i915); void intel_cdclk_uninit(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e289b4ffd34bb..ff6aff2a48663 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1420,6 +1420,9 @@ struct drm_i915_private { /* The current hardware cdclk state */ struct intel_cdclk_state hw; + /* cdclk, divider, and ratio table from bspec */ + const struct intel_cdclk_vals *table; + int force_min_cdclk; } cdclk; From 1cbcd3b4b168a08620153d2c5d52461a75338349 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 08:42:47 -0700 Subject: [PATCH 132/331] drm/i915: Combine bxt_set_cdclk and cnl_set_cdclk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'd previously combined ICL/TGL logic into the cnl_set_cdclk function, but BXT is pretty similar as well. Roll the cnl/icl/tgl logic back into the bxt function; the only things we really need to handle separately are punit notification and calling different functions to enable/disable the cdclk PLL. Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910154252.30503-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 267 +++++++++------------ 1 file changed, 119 insertions(+), 148 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index f3431b530966a..1dcb8a113b4f5 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1440,6 +1440,39 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } +static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) +{ + u32 val; + + val = I915_READ(BXT_DE_PLL_ENABLE); + val &= ~BXT_DE_PLL_PLL_ENABLE; + I915_WRITE(BXT_DE_PLL_ENABLE, val); + + /* Timeout 200us */ + if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1)) + DRM_ERROR("timeout waiting for CDCLK PLL unlock\n"); + + dev_priv->cdclk.hw.vco = 0; +} + +static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); + u32 val; + + val = CNL_CDCLK_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_ENABLE, val); + + val |= BXT_DE_PLL_PLL_ENABLE; + I915_WRITE(BXT_DE_PLL_ENABLE, val); + + /* Timeout 200us */ + if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) != 0, 1)) + DRM_ERROR("timeout waiting for CDCLK PLL lock\n"); + + dev_priv->cdclk.hw.vco = vco; +} + static void bxt_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state, enum pipe pipe) @@ -1449,6 +1482,27 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; + /* Inform power controller of upcoming frequency change. */ + if (INTEL_GEN(dev_priv) >= 10) + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + else + /* + * BSpec requires us to wait up to 150usec, but that leads to + * timeouts; the 2ms used here is based on experiment. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000, 150, 2); + + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (err %d, freq %d)\n", + ret, cdclk); + return; + } + /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { default: @@ -1459,63 +1513,82 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10, + "Unsupported divider\n"); divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; break; case 4: divider = BXT_CDCLK_CD2X_DIV_SEL_2; break; case 8: + WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n"); divider = BXT_CDCLK_CD2X_DIV_SEL_4; break; } - /* - * Inform power controller of upcoming frequency change. BSpec - * requires us to wait up to 150usec, but that leads to timeouts; - * the 2ms used here is based on experiment. - */ - ret = sandybridge_pcode_write_timeout(dev_priv, - HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000, 150, 2); - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } + if (INTEL_GEN(dev_priv) >= 10) { + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_disable(dev_priv); - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - bxt_de_pll_disable(dev_priv); + if (dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_enable(dev_priv, vco); - if (dev_priv->cdclk.hw.vco != vco) - bxt_de_pll_enable(dev_priv, vco); + } else { + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_disable(dev_priv); + + if (dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + } val = divider | skl_cdclk_decimal(cdclk); - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); + + if (INTEL_GEN(dev_priv) >= 12) { + if (pipe == INVALID_PIPE) + val |= TGL_CDCLK_CD2X_PIPE_NONE; + else + val |= TGL_CDCLK_CD2X_PIPE(pipe); + } else if (INTEL_GEN(dev_priv) >= 11) { + if (pipe == INVALID_PIPE) + val |= ICL_CDCLK_CD2X_PIPE_NONE; + else + val |= ICL_CDCLK_CD2X_PIPE(pipe); + } else { + if (pipe == INVALID_PIPE) + val |= BXT_CDCLK_CD2X_PIPE_NONE; + else + val |= BXT_CDCLK_CD2X_PIPE(pipe); + } + /* * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (cdclk >= 500000) + if (IS_GEN9_LP(dev_priv) && cdclk >= 500000) val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; I915_WRITE(CDCLK_CTL, val); if (pipe != INVALID_PIPE) intel_wait_for_vblank(dev_priv, pipe); - /* - * The timeout isn't specified, the 2ms used here is based on - * experiment. - * FIXME: Waiting for the request completion could be delayed until - * the next PCODE request based on BSpec. - */ - ret = sandybridge_pcode_write_timeout(dev_priv, - HSW_PCODE_DE_WRITE_FREQ_REQ, - cdclk_state->voltage_level, 150, 2); + if (INTEL_GEN(dev_priv) >= 10) { + ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); + } else { + /* + * The timeout isn't specified, the 2ms used here is based on + * experiment. + * FIXME: Waiting for the request completion could be delayed + * until the next PCODE request based on BSpec. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_state->voltage_level, + 150, 2); + } + if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", ret, cdclk); @@ -1523,6 +1596,13 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, } intel_update_cdclk(dev_priv); + + if (INTEL_GEN(dev_priv) >= 10) + /* + * Can't read out the voltage level :( + * Let's just assume everything is as expected. + */ + dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; } static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) @@ -1608,115 +1688,6 @@ static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = I915_READ(BXT_DE_PLL_ENABLE); - val &= ~BXT_DE_PLL_PLL_ENABLE; - I915_WRITE(BXT_DE_PLL_ENABLE, val); - - /* Timeout 200us */ - if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1)) - DRM_ERROR("timeout waiting for CDCLK PLL unlock\n"); - - dev_priv->cdclk.hw.vco = 0; -} - -static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) -{ - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); - u32 val; - - val = CNL_CDCLK_PLL_RATIO(ratio); - I915_WRITE(BXT_DE_PLL_ENABLE, val); - - val |= BXT_DE_PLL_PLL_ENABLE; - I915_WRITE(BXT_DE_PLL_ENABLE, val); - - /* Timeout 200us */ - if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) != 0, 1)) - DRM_ERROR("timeout waiting for CDCLK PLL lock\n"); - - dev_priv->cdclk.hw.vco = vco; -} - -static void cnl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state, - enum pipe pipe) -{ - int cdclk = cdclk_state->cdclk; - int vco = cdclk_state->vco; - u32 val, divider; - int ret; - - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - /* cdclk = vco / 2 / div{1,2} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - default: - WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); - WARN_ON(vco != 0); - /* fall through */ - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - } - - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - - if (INTEL_GEN(dev_priv) >= 12) { - if (pipe == INVALID_PIPE) - val |= TGL_CDCLK_CD2X_PIPE_NONE; - else - val |= TGL_CDCLK_CD2X_PIPE(pipe); - } else if (INTEL_GEN(dev_priv) >= 11) { - if (pipe == INVALID_PIPE) - val |= ICL_CDCLK_CD2X_PIPE_NONE; - else - val |= ICL_CDCLK_CD2X_PIPE(pipe); - } else { - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); - } - I915_WRITE(CDCLK_CTL, val); - - if (pipe != INVALID_PIPE) - intel_wait_for_vblank(dev_priv, pipe); - - /* inform PCU of the change */ - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, - cdclk_state->voltage_level); - - intel_update_cdclk(dev_priv); - - /* - * Can't read out the voltage level :( - * Let's just assume everything is as expected. - */ - dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; -} - static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; @@ -1797,7 +1768,7 @@ static void icl_init_cdclk(struct drm_i915_private *dev_priv) sanitized_state.voltage_level = icl_calc_voltage_level(sanitized_state.cdclk); - cnl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); } static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) @@ -1813,7 +1784,7 @@ static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.voltage_level = icl_calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } static void cnl_init_cdclk(struct drm_i915_private *dev_priv) @@ -1832,7 +1803,7 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) @@ -1843,7 +1814,7 @@ static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.vco = 0; cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -2646,11 +2617,11 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) >= 11) { - dev_priv->display.set_cdclk = cnl_set_cdclk; + dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->display.set_cdclk = cnl_set_cdclk; + dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { From 5dac256bf76793d4e4c554a3c6784e39ccf06664 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 08:42:48 -0700 Subject: [PATCH 133/331] drm/i915: Kill cnl_sanitize_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CNL variant of this function is identical to the BXT variant aside from not needing to handle SSA precharge. Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910154252.30503-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 46 +--------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 1dcb8a113b4f5..ebed87a64c98e 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1636,7 +1636,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (dev_priv->cdclk.hw.cdclk >= 500000) + if (IS_GEN9_LP(dev_priv) && dev_priv->cdclk.hw.cdclk >= 500000) expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; if (cdctl == expected) @@ -1688,48 +1688,6 @@ static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - - if (dev_priv->cdclk.hw.vco == 0 || - dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk.hw.cdclk = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk.hw.vco = -1; -} - static void icl_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state sanitized_state; @@ -1791,7 +1749,7 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state; - cnl_sanitize_cdclk(dev_priv); + bxt_sanitize_cdclk(dev_priv); if (dev_priv->cdclk.hw.cdclk != 0 && dev_priv->cdclk.hw.vco != 0) From 751a93a15cde277ada4e1be14d93eb13a0dc3edd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 08:42:49 -0700 Subject: [PATCH 134/331] drm/i915: Consolidate {bxt,cnl,icl}_uninit_cdclk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uninitialize flow is the same on all of these platforms, aside from calculating a different frequency level. v2: Reverse platform conditional order for consistency. (Ville) Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910154252.30503-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 48 +++++++--------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ebed87a64c98e..679aeebfbca51 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1683,7 +1683,18 @@ static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; - cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); + if (IS_ELKHARTLAKE(dev_priv)) + cdclk_state.voltage_level = + ehl_calc_voltage_level(cdclk_state.cdclk); + else if (INTEL_GEN(dev_priv) >= 11) + cdclk_state.voltage_level = + icl_calc_voltage_level(cdclk_state.cdclk); + else if (INTEL_GEN(dev_priv) >= 10) + cdclk_state.voltage_level = + cnl_calc_voltage_level(cdclk_state.cdclk); + else + cdclk_state.voltage_level = + bxt_calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } @@ -1729,22 +1740,6 @@ static void icl_init_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); } -static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = cdclk_state.bypass; - cdclk_state.vco = 0; - if (IS_ELKHARTLAKE(dev_priv)) - cdclk_state.voltage_level = - ehl_calc_voltage_level(cdclk_state.cdclk); - else - cdclk_state.voltage_level = - icl_calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - static void cnl_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state; @@ -1764,17 +1759,6 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = cdclk_state.bypass; - cdclk_state.vco = 0; - cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - /** * intel_cdclk_init - Initialize CDCLK * @i915: i915 device @@ -1805,14 +1789,10 @@ void intel_cdclk_init(struct drm_i915_private *i915) */ void intel_cdclk_uninit(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) >= 11) - icl_uninit_cdclk(i915); - else if (IS_CANNONLAKE(i915)) - cnl_uninit_cdclk(i915); + if (INTEL_GEN(i915) >= 10 || IS_GEN9_LP(i915)) + bxt_uninit_cdclk(i915); else if (IS_GEN9_BC(i915)) skl_uninit_cdclk(i915); - else if (IS_GEN9_LP(i915)) - bxt_uninit_cdclk(i915); } /** From d2f429ebb97725e9bec597aad2f0d2c7d203842f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 08:42:50 -0700 Subject: [PATCH 135/331] drm/i915: Add calc_voltage_level display vfunc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With all of the cdclk function consolidation, we can cut down on a lot of platform if/else logic by creating a vfunc that's initialized at startup. Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910154252.30503-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 75 ++++++++-------------- drivers/gpu/drm/i915/i915_drv.h | 1 + 2 files changed, 27 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 679aeebfbca51..e1a4ac9bd4f0d 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1394,18 +1394,8 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, * Can't read this out :( Let's assume it's * at least what the CDCLK frequency requires. */ - if (IS_ELKHARTLAKE(dev_priv)) - cdclk_state->voltage_level = - ehl_calc_voltage_level(cdclk_state->cdclk); - else if (INTEL_GEN(dev_priv) >= 11) - cdclk_state->voltage_level = - icl_calc_voltage_level(cdclk_state->cdclk); - else if (INTEL_GEN(dev_priv) >= 10) - cdclk_state->voltage_level = - cnl_calc_voltage_level(cdclk_state->cdclk); - else - cdclk_state->voltage_level = - bxt_calc_voltage_level(cdclk_state->cdclk); + cdclk_state->voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state->cdclk); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1672,7 +1662,8 @@ static void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); - cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } @@ -1683,18 +1674,8 @@ static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; - if (IS_ELKHARTLAKE(dev_priv)) - cdclk_state.voltage_level = - ehl_calc_voltage_level(cdclk_state.cdclk); - else if (INTEL_GEN(dev_priv) >= 11) - cdclk_state.voltage_level = - icl_calc_voltage_level(cdclk_state.cdclk); - else if (INTEL_GEN(dev_priv) >= 10) - cdclk_state.voltage_level = - cnl_calc_voltage_level(cdclk_state.cdclk); - else - cdclk_state.voltage_level = - bxt_calc_voltage_level(cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } @@ -1730,12 +1711,8 @@ static void icl_init_cdclk(struct drm_i915_private *dev_priv) sanitized_state.cdclk = bxt_calc_cdclk(dev_priv, 0); sanitized_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, sanitized_state.cdclk); - if (IS_ELKHARTLAKE(dev_priv)) - sanitized_state.voltage_level = - ehl_calc_voltage_level(sanitized_state.cdclk); - else - sanitized_state.voltage_level = - icl_calc_voltage_level(sanitized_state.cdclk); + sanitized_state.voltage_level = + dev_priv->display.calc_voltage_level(sanitized_state.cdclk); bxt_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); } @@ -1754,7 +1731,8 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); - cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } @@ -2246,7 +2224,7 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = - bxt_calc_voltage_level(cdclk); + dev_priv->display.calc_voltage_level(cdclk); if (!state->active_pipes) { cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); @@ -2255,7 +2233,7 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; state->cdclk.actual.voltage_level = - bxt_calc_voltage_level(cdclk); + dev_priv->display.calc_voltage_level(cdclk); } else { state->cdclk.actual = state->cdclk.logical; } @@ -2310,14 +2288,9 @@ static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; - if (IS_ELKHARTLAKE(dev_priv)) - state->cdclk.logical.voltage_level = - max(ehl_calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); - else - state->cdclk.logical.voltage_level = - max(icl_calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); + state->cdclk.logical.voltage_level = + max(dev_priv->display.calc_voltage_level(cdclk), + cnl_compute_min_voltage_level(state)); if (!state->active_pipes) { cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); @@ -2325,12 +2298,8 @@ static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; - if (IS_ELKHARTLAKE(dev_priv)) - state->cdclk.actual.voltage_level = - ehl_calc_voltage_level(cdclk); - else - state->cdclk.actual.voltage_level = - icl_calc_voltage_level(cdclk); + state->cdclk.actual.voltage_level = + dev_priv->display.calc_voltage_level(cdclk); } else { state->cdclk.actual = state->cdclk.logical; } @@ -2554,17 +2523,25 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 11) { + if (IS_ELKHARTLAKE(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = ehl_calc_voltage_level; + dev_priv->cdclk.table = icl_cdclk_table; + } else if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = icl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = bxt_calc_voltage_level; dev_priv->cdclk.table = bxt_cdclk_table; } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.set_cdclk = skl_set_cdclk; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ff6aff2a48663..dc4d40b44d74d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -276,6 +276,7 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct intel_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct intel_atomic_state *state); + u8 (*calc_voltage_level)(int cdclk); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, From 8f9f717d6c44649353e153cfcd32ad99618956a9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 08:42:51 -0700 Subject: [PATCH 136/331] drm/i915: Enhance cdclk sanitization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading out the BIOS-programmed cdclk state, let's make sure that the cdclk value is on the valid list for the platform, ensure that the VCO matches the cdclk, and ensure that the CD2X divider was set properly. Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910154252.30503-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 34 ++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index e1a4ac9bd4f0d..988d0849ce091 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1598,6 +1598,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; + int cdclk, vco; intel_update_cdclk(dev_priv); intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); @@ -1620,8 +1621,37 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) */ cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); + /* Make sure this is a legal cdclk value for the platform */ + cdclk = bxt_calc_cdclk(dev_priv, dev_priv->cdclk.hw.cdclk); + if (cdclk != dev_priv->cdclk.hw.cdclk) + goto sanitize; + + /* Make sure the VCO is correct for the cdclk */ + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); + if (vco != dev_priv->cdclk.hw.vco) + goto sanitize; + + expected = skl_cdclk_decimal(cdclk); + + /* Figure out what CD2X divider we should be using for this cdclk */ + switch (DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.cdclk)) { + case 2: + expected |= BXT_CDCLK_CD2X_DIV_SEL_1; + break; + case 3: + expected |= BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; + case 4: + expected |= BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 8: + expected |= BXT_CDCLK_CD2X_DIV_SEL_4; + break; + default: + goto sanitize; + } + /* * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. From 0c1279b58fc7de230d47bb0dc21b7a08417dee96 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Sep 2019 08:42:52 -0700 Subject: [PATCH 137/331] drm/i915: Consolidate {bxt,cnl,icl}_init_cdclk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BXT and CNL functions were already basically identical, whereas ICL's function tried to do its own sanitization rather than calling bxt_sanitize_cdclk. This should actually fix a bug in our ICL initialization where it would consider the /2 CD2X divider invalid and force an unnecessary sanitization (we now have valid clock frequencies that use this divider). Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910154252.30503-9-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 65 +--------------------- 1 file changed, 2 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 988d0849ce091..618a93bad0a8d 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1710,63 +1710,6 @@ static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static void icl_init_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state sanitized_state; - u32 val; - - /* This sets dev_priv->cdclk.hw. */ - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - - /* This means CDCLK disabled. */ - if (dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) - goto sanitize; - - val = I915_READ(CDCLK_CTL); - - if ((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0) - goto sanitize; - - if ((val & CDCLK_FREQ_DECIMAL_MASK) != - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk)) - goto sanitize; - - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - sanitized_state.ref = dev_priv->cdclk.hw.ref; - sanitized_state.cdclk = bxt_calc_cdclk(dev_priv, 0); - sanitized_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, - sanitized_state.cdclk); - sanitized_state.voltage_level = - dev_priv->display.calc_voltage_level(sanitized_state.cdclk); - - bxt_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); -} - -static void cnl_init_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk.hw.cdclk != 0 && - dev_priv->cdclk.hw.vco != 0) - return; - - cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); - cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); - cdclk_state.voltage_level = - dev_priv->display.calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - /** * intel_cdclk_init - Initialize CDCLK * @i915: i915 device @@ -1778,14 +1721,10 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) */ void intel_cdclk_init(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) >= 11) - icl_init_cdclk(i915); - else if (IS_CANNONLAKE(i915)) - cnl_init_cdclk(i915); + if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + bxt_init_cdclk(i915); else if (IS_GEN9_BC(i915)) skl_init_cdclk(i915); - else if (IS_GEN9_LP(i915)) - bxt_init_cdclk(i915); } /** From 61fa60ff6e6a0e16e9bcc15f632ce5437afa6494 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Sep 2019 15:38:20 +0100 Subject: [PATCH 138/331] drm/i915: Move GT init to intel_gt.c Code in i915_gem_init_hw is all about GT init so move it to intel_gt.c renaming to intel_gt_init_hw. Existing intel_gt_init_hw is renamed to intel_gt_init_hw_early since it is currently called from driver probe. Signed-off-by: Tvrtko Ursulin Cc: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190910143823.10686-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c | 92 ++++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_gt.h | 3 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 91 +--------------------- drivers/gpu/drm/i915/selftests/mock_gtt.c | 2 +- 8 files changed, 98 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 9b1129aaacfe5..3bd764104d41c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -242,7 +242,7 @@ void i915_gem_resume(struct drm_i915_private *i915) mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - if (i915_gem_init_hw(i915)) + if (intel_gt_init_hw(&i915->gt)) goto err_wedged; /* diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e2cc697d27fbc..eef9bdae8ebb4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_mocs.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -25,7 +26,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_uc_init_early(>->uc); } -void intel_gt_init_hw(struct drm_i915_private *i915) +void intel_gt_init_hw_early(struct drm_i915_private *i915) { i915->gt.ggtt = &i915->ggtt; @@ -33,6 +34,95 @@ void intel_gt_init_hw(struct drm_i915_private *i915) intel_gt_pm_disable(&i915->gt); } +static void init_unused_ring(struct intel_gt *gt, u32 base) +{ + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_write(uncore, RING_CTL(base), 0); + intel_uncore_write(uncore, RING_HEAD(base), 0); + intel_uncore_write(uncore, RING_TAIL(base), 0); + intel_uncore_write(uncore, RING_START(base), 0); +} + +static void init_unused_rings(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + if (IS_I830(i915)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + init_unused_ring(gt, SRB2_BASE); + init_unused_ring(gt, SRB3_BASE); + } else if (IS_GEN(i915, 2)) { + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + } else if (IS_GEN(i915, 3)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, PRB2_BASE); + } +} + +int intel_gt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + int ret; + + BUG_ON(!i915->kernel_context); + ret = intel_gt_terminally_wedged(gt); + if (ret) + return ret; + + gt->last_init_time = ktime_get(); + + /* Double layer security blanket, see i915_gem_init() */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) + intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); + + if (IS_HASWELL(i915)) + intel_uncore_write(uncore, + MI_PREDICATE_RESULT_2, + IS_HSW_GT3(i915) ? + LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); + + /* Apply the GT workarounds... */ + intel_gt_apply_workarounds(gt); + /* ...and determine whether they are sticking. */ + intel_gt_verify_workarounds(gt, "init"); + + intel_gt_init_swizzling(gt); + + /* + * At least 830 can leave some of the unused rings + * "active" (ie. head != tail) after resume which + * will prevent c3 entry. Makes sure all unused rings + * are totally idle. + */ + init_unused_rings(gt); + + ret = i915_ppgtt_init_hw(gt); + if (ret) { + DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); + goto out; + } + + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(>->uc); + if (ret) { + i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); + goto out; + } + + intel_mocs_init(gt); + +out: + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + return ret; +} + static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) { intel_uncore_rmw(uncore, reg, 0, set); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 17af21cb7ed35..e6ab0bff0efb5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -28,7 +28,8 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw(struct drm_i915_private *i915); +void intel_gt_init_hw_early(struct drm_i915_private *i915); +int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); void intel_gt_driver_register(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b9d84d52e9864..296bbc7745fb0 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -972,7 +972,7 @@ void intel_gt_reset(struct intel_gt *gt, * was running at the time of the reset (i.e. we weren't VT * switched away). */ - ret = i915_gem_init_hw(gt->i915); + ret = intel_gt_init_hw(gt); if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0aadff9b8c88e..8966672b0294c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1269,7 +1269,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; - intel_gt_init_hw(dev_priv); + intel_gt_init_hw_early(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dc4d40b44d74d..51d575cca2ac3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2322,7 +2322,6 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); -int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 141024c66f36b..db2681514a0b9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1148,95 +1148,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static void init_unused_ring(struct intel_gt *gt, u32 base) -{ - struct intel_uncore *uncore = gt->uncore; - - intel_uncore_write(uncore, RING_CTL(base), 0); - intel_uncore_write(uncore, RING_HEAD(base), 0); - intel_uncore_write(uncore, RING_TAIL(base), 0); - intel_uncore_write(uncore, RING_START(base), 0); -} - -static void init_unused_rings(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - - if (IS_I830(i915)) { - init_unused_ring(gt, PRB1_BASE); - init_unused_ring(gt, SRB0_BASE); - init_unused_ring(gt, SRB1_BASE); - init_unused_ring(gt, SRB2_BASE); - init_unused_ring(gt, SRB3_BASE); - } else if (IS_GEN(i915, 2)) { - init_unused_ring(gt, SRB0_BASE); - init_unused_ring(gt, SRB1_BASE); - } else if (IS_GEN(i915, 3)) { - init_unused_ring(gt, PRB1_BASE); - init_unused_ring(gt, PRB2_BASE); - } -} - -int i915_gem_init_hw(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - struct intel_gt *gt = &i915->gt; - int ret; - - BUG_ON(!i915->kernel_context); - ret = intel_gt_terminally_wedged(gt); - if (ret) - return ret; - - gt->last_init_time = ktime_get(); - - /* Double layer security blanket, see i915_gem_init() */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) - intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); - - if (IS_HASWELL(i915)) - intel_uncore_write(uncore, - MI_PREDICATE_RESULT_2, - IS_HSW_GT3(i915) ? - LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); - - /* Apply the GT workarounds... */ - intel_gt_apply_workarounds(gt); - /* ...and determine whether they are sticking. */ - intel_gt_verify_workarounds(gt, "init"); - - intel_gt_init_swizzling(gt); - - /* - * At least 830 can leave some of the unused rings - * "active" (ie. head != tail) after resume which - * will prevent c3 entry. Makes sure all unused rings - * are totally idle. - */ - init_unused_rings(gt); - - ret = i915_ppgtt_init_hw(gt); - if (ret) { - DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); - goto out; - } - - /* We can't enable contexts until all firmware is loaded */ - ret = intel_uc_init_hw(>->uc); - if (ret) { - i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); - goto out; - } - - intel_mocs_init(gt); - -out: - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - return ret; -} - static int __intel_engines_record_defaults(struct drm_i915_private *i915) { struct i915_request *requests[I915_NUM_ENGINES] = {}; @@ -1449,7 +1360,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_uc_init(&dev_priv->gt.uc); - ret = i915_gem_init_hw(dev_priv); + ret = intel_gt_init_hw(&dev_priv->gt); if (ret) goto err_uc_init; diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e62a67e0f79c0..7d5fb60b43bb3 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -118,7 +118,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - intel_gt_init_hw(i915); + intel_gt_init_hw_early(i915); } void mock_fini_ggtt(struct i915_ggtt *ggtt) From dab3588a151e5edb0cb0d56476265d2e5b3b1c79 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Sep 2019 15:38:21 +0100 Subject: [PATCH 139/331] drm/i915: Make wait_for_timelines take struct intel_gt Timelines live in struct intel_gt so make wait_for_timelines take exactly what it needs. Signed-off-by: Tvrtko Ursulin Cc: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190910143823.10686-3-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index db2681514a0b9..2da9544fa9a4e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -888,10 +888,9 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } static long -wait_for_timelines(struct drm_i915_private *i915, - unsigned int wait, long timeout) +wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout) { - struct intel_gt_timelines *timelines = &i915->gt.timelines; + struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; unsigned long flags; @@ -934,15 +933,17 @@ wait_for_timelines(struct drm_i915_private *i915, int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { + struct intel_gt *gt = &i915->gt; + /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(&i915->gt)) + if (!intel_gt_pm_is_awake(gt)) return 0; GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); - timeout = wait_for_timelines(i915, flags, timeout); + timeout = wait_for_timelines(gt, flags, timeout); if (timeout < 0) return timeout; From ee236af8d51439ce2d4396ecaf84062b34b15f85 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Sep 2019 15:38:22 +0100 Subject: [PATCH 140/331] drm/i915: Avoid round-trip via i915 in intel_gt_park Both in the container_of and getting to gt->awake there is no need to go via i915 since both the wakeref and awake are members of gt. Signed-off-by: Tvrtko Ursulin Cc: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190910143823.10686-4-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 6ba0d2069f875..a2e29bcc9671a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -56,9 +56,9 @@ static int __gt_unpark(struct intel_wakeref *wf) static int __gt_park(struct intel_wakeref *wf) { - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); - intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + intel_wakeref_t wakeref = fetch_and_zero(>->awake); + struct drm_i915_private *i915 = gt->i915; GEM_TRACE("\n"); From 85dd14c2918d452fa875a4175bffb76e01c5af6e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 10 Sep 2019 15:38:23 +0100 Subject: [PATCH 141/331] drm/i915: Make pm_notify take intel_gt These notifications operate on intel_gt so make the code take what it needs. Signed-off-by: Tvrtko Ursulin Cc: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190910143823.10686-5-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index a2e29bcc9671a..2ccf8cacaa0a7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -13,9 +13,9 @@ #include "intel_pm.h" #include "intel_wakeref.h" -static void pm_notify(struct drm_i915_private *i915, int state) +static void pm_notify(struct intel_gt *gt, int state) { - blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); + blocking_notifier_call_chain(>->pm_notifications, state, gt->i915); } static int __gt_unpark(struct intel_wakeref *wf) @@ -49,7 +49,7 @@ static int __gt_unpark(struct intel_wakeref *wf) intel_gt_queue_hangcheck(gt); - pm_notify(i915, INTEL_GT_UNPARK); + pm_notify(gt, INTEL_GT_UNPARK); return 0; } @@ -62,7 +62,7 @@ static int __gt_park(struct intel_wakeref *wf) GEM_TRACE("\n"); - pm_notify(i915, INTEL_GT_PARK); + pm_notify(gt, INTEL_GT_PARK); i915_pmu_gt_parked(i915); if (INTEL_GEN(i915) >= 6) From 99013b10100c4b552eec845ee2ca5604c8332e92 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Sep 2019 22:22:04 +0100 Subject: [PATCH 142/331] drm/i915: Make shrink/unshrink be atomic Add an atomic counter and always take the spinlock around the pin/unpin events, so that we can perform the list manipulation concurrently. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190910212204.17190-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 36 +++++++++++-------- drivers/gpu/drm/i915/gt/intel_context.c | 2 +- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index da3e7cf12aa17..55c3ab59e3aa4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -494,7 +494,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) spin_lock_irqsave(&i915->mm.obj_lock, flags); - if (obj->mm.madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED && + !atomic_read(&obj->mm.shrink_pin)) list_move_tail(&obj->mm.link, &i915->mm.shrink_list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index a558edf15ec80..d695f187b7905 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -156,6 +156,7 @@ struct drm_i915_gem_object { struct { struct mutex lock; /* protects the pages and their use */ atomic_t pages_pin_count; + atomic_t shrink_pin; struct sg_table *pages; void *mapping; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 18f0ce0135c17..2e941f093a20f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -71,6 +71,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, list = &i915->mm.shrink_list; list_add_tail(&obj->mm.link, list); + atomic_set(&obj->mm.shrink_pin, 0); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 4e55cfc2b0dc6..d2c05d7529098 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -516,46 +516,52 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + /* * We can only be called while the pages are pinned or when * the pages are released. If pinned, we should only be called * from a single caller under controlled conditions; and on release * only one caller may release us. Neither the two may cross. */ - if (!list_empty(&obj->mm.link)) { /* pinned by caller */ - struct drm_i915_private *i915 = obj_to_i915(obj); - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - GEM_BUG_ON(list_empty(&obj->mm.link)); + if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) + return; + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!atomic_fetch_inc(&obj->mm.shrink_pin) && + !list_empty(&obj->mm.link)) { list_del_init(&obj->mm.link); i915->mm.shrink_count--; i915->mm.shrink_memory -= obj->base.size; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, struct list_head *head) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); - GEM_BUG_ON(!list_empty(&obj->mm.link)); + if (!i915_gem_object_is_shrinkable(obj)) + return; - if (i915_gem_object_is_shrinkable(obj)) { - struct drm_i915_private *i915 = obj_to_i915(obj); - unsigned long flags; + if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) + return; - spin_lock_irqsave(&i915->mm.obj_lock, flags); - GEM_BUG_ON(!kref_read(&obj->base.refcount)); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(!kref_read(&obj->base.refcount)); + if (atomic_dec_and_test(&obj->mm.shrink_pin)) { + GEM_BUG_ON(!list_empty(&obj->mm.link)); list_add_tail(&obj->mm.link, head); i915->mm.shrink_count++; i915->mm.shrink_memory += obj->base.size; - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index f55691d151aec..c0495811f4932 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -134,8 +134,8 @@ static int __context_pin_state(struct i915_vma *vma) static void __context_unpin_state(struct i915_vma *vma) { - __i915_vma_unpin(vma); i915_vma_make_shrinkable(vma); + __i915_vma_unpin(vma); } static void __intel_context_retire(struct i915_active *active) From 0606259e3b3a1220a0f04a92a1654a3f674f47ee Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 10 Sep 2019 18:48:01 -0700 Subject: [PATCH 143/331] drm/i915: Whitelist COMMON_SLICE_CHICKEN2 This allows userspace to use "legacy" mode for push constants, where they are committed at 3DPRIMITIVE or flush time, rather than being committed at 3DSTATE_BINDING_TABLE_POINTERS_XS time. Gen6-8 and Gen11 both use the "legacy" behavior - only Gen9 works in the "new" way. Conflating push constants with binding tables is painful for userspace, we would like to be able to avoid doing so. Signed-off-by: Kenneth Graunke Cc: stable@vger.kernel.org Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911014801.26821-1-kenneth@whitecape.org --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 243d3f77be13f..41d0f786e06d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1062,6 +1062,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w) /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ whitelist_reg(w, GEN8_HDC_CHICKEN1); + + /* WaSendPushConstantsFromMMIO:skl,bxt */ + whitelist_reg(w, COMMON_SLICE_CHICKEN2); } static void skl_whitelist_build(struct intel_engine_cs *engine) From 4dd2fbbfb532d0981b0ecd218c0597ac0047ca55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Sep 2019 10:02:43 +0100 Subject: [PATCH 144/331] drm/i915: Make i915_vma.flags atomic_t for mutex reduction In preparation for reducing struct_mutex stranglehold around the vm, make the vma.flags atomic so that we can acquire a pin on the vma atomically before deciding if we need to take the mutex. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190911090243.16786-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 ++--- drivers/gpu/drm/i915/i915_vma.c | 29 +++++----- drivers/gpu/drm/i915/i915_vma.h | 62 +++++++++++++--------- drivers/gpu/drm/i915/selftests/mock_gtt.c | 4 +- 6 files changed, 64 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d7855dc5a5c57..0ef60dae23a7e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -163,7 +163,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); i915_vma_destroy(vma); } GEM_BUG_ON(!list_empty(&obj->vma.list)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 7ce5259d73d62..bfbc3e3daf924 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -688,7 +688,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 48688d683e951..e4f66bfe74c29 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -155,7 +155,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int err; - if (!(vma->flags & I915_VMA_LOCAL_BIND)) { + if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { err = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); if (err) @@ -1877,7 +1877,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) vma->size = size; vma->fence_size = size; - vma->flags = I915_VMA_GGTT; + atomic_set(&vma->flags, I915_VMA_GGTT); vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ INIT_LIST_HEAD(&vma->obj_link); @@ -2440,7 +2440,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally * upgrade to both bound if we bind either to avoid double-binding. */ - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); return 0; } @@ -2470,7 +2470,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_LOCAL_BIND) { struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; - if (!(vma->flags & I915_VMA_LOCAL_BIND)) { + if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { ret = alias->vm.allocate_va_range(&alias->vm, vma->node.start, vma->size); @@ -2498,7 +2498,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - if (vma->flags & I915_VMA_GLOBAL_BIND) { + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { struct i915_address_space *vm = vma->vm; intel_wakeref_t wakeref; @@ -2506,7 +2506,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) vm->clear_range(vm, vma->node.start, vma->size); } - if (vma->flags & I915_VMA_LOCAL_BIND) { + if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { struct i915_address_space *vm = &i915_vm_to_ggtt(vma->vm)->alias->vm; @@ -3308,7 +3308,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) continue; mutex_unlock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 68d9f9b4d050e..411047d6a909f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -171,7 +171,7 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - vma->flags |= I915_VMA_GGTT; + __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } spin_lock(&obj->vma.lock); @@ -325,7 +325,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (flags & PIN_USER) bind_flags |= I915_VMA_LOCAL_BIND; - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + vma_flags = atomic_read(&vma->flags); + vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; if (flags & PIN_UPDATE) bind_flags |= vma_flags; else @@ -340,7 +341,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (ret) return ret; - vma->flags |= bind_flags; + atomic_or(bind_flags, &vma->flags); return 0; } @@ -359,7 +360,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); + GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); ptr = vma->iomap; if (ptr == NULL) { @@ -472,9 +473,9 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; if (mappable && fenceable) - vma->flags |= I915_VMA_CAN_FENCE; + set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); else - vma->flags &= ~I915_VMA_CAN_FENCE; + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) @@ -544,7 +545,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) int ret; GEM_BUG_ON(i915_vma_is_closed(vma)); - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); @@ -678,7 +679,7 @@ static void i915_vma_remove(struct i915_vma *vma) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); vma->ops->clear_pages(vma); @@ -709,7 +710,7 @@ i915_vma_remove(struct i915_vma *vma) int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - const unsigned int bound = vma->flags; + const unsigned int bound = atomic_read(&vma->flags); int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -732,9 +733,9 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (ret) goto err_remove; - GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0); + GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)); - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) + if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); @@ -744,7 +745,7 @@ int __i915_vma_do_pin(struct i915_vma *vma, if ((bound & I915_VMA_BIND_MASK) == 0) { i915_vma_remove(vma); GEM_BUG_ON(vma->pages); - GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK); + GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK); } err_unpin: __i915_vma_unpin(vma); @@ -991,7 +992,7 @@ int i915_vma_unbind(struct i915_vma *vma) mutex_unlock(&vma->vm->mutex); __i915_vma_iounmap(vma); - vma->flags &= ~I915_VMA_CAN_FENCE; + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); @@ -1000,7 +1001,7 @@ int i915_vma_unbind(struct i915_vma *vma) trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma); } - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + atomic_and(~I915_VMA_BIND_MASK, &vma->flags); i915_vma_remove(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 425bda4db2c26..8bcb5812c4467 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -72,7 +72,7 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ atomic_t open_count; - unsigned long flags; + atomic_t flags; /** * How many users have pinned this object in GTT space. * @@ -97,18 +97,29 @@ struct i915_vma { * users. */ #define I915_VMA_PIN_MASK 0xff -#define I915_VMA_PIN_OVERFLOW BIT(8) +#define I915_VMA_PIN_OVERFLOW_BIT 8 +#define I915_VMA_PIN_OVERFLOW ((int)BIT(I915_VMA_PIN_OVERFLOW_BIT)) /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(9) -#define I915_VMA_LOCAL_BIND BIT(10) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) +#define I915_VMA_GLOBAL_BIND_BIT 9 +#define I915_VMA_LOCAL_BIND_BIT 10 -#define I915_VMA_GGTT BIT(11) -#define I915_VMA_CAN_FENCE BIT(12) +#define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) +#define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) + +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \ + I915_VMA_LOCAL_BIND | \ + I915_VMA_PIN_OVERFLOW) + +#define I915_VMA_GGTT_BIT 11 +#define I915_VMA_CAN_FENCE_BIT 12 #define I915_VMA_USERFAULT_BIT 13 -#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE BIT(14) +#define I915_VMA_GGTT_WRITE_BIT 14 + +#define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) +#define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) +#define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) +#define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) struct i915_active active; @@ -162,48 +173,51 @@ int __must_check i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags); +#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter) + static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) { - return vma->flags & I915_VMA_GGTT; + return test_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma) { - return vma->flags & I915_VMA_GGTT_WRITE; + return test_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma)); } static inline void i915_vma_set_ggtt_write(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - vma->flags |= I915_VMA_GGTT_WRITE; + set_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma)); } -static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma) +static inline bool i915_vma_unset_ggtt_write(struct i915_vma *vma) { - vma->flags &= ~I915_VMA_GGTT_WRITE; + return test_and_clear_bit(I915_VMA_GGTT_WRITE_BIT, + __i915_vma_flags(vma)); } void i915_vma_flush_writes(struct i915_vma *vma); static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) { - return vma->flags & I915_VMA_CAN_FENCE; + return test_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_set_userfault(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); - return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return test_and_set_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline void i915_vma_unset_userfault(struct i915_vma *vma) { - return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return clear_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_has_userfault(const struct i915_vma *vma) { - return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return test_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_is_closed(const struct i915_vma *vma) @@ -330,7 +344,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) /* Pin early to prevent the shrinker/eviction logic from destroying * our vma as we insert and bind. */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) { + if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; @@ -341,7 +355,7 @@ i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) static inline int i915_vma_pin_count(const struct i915_vma *vma) { - return vma->flags & I915_VMA_PIN_MASK; + return atomic_read(&vma->flags) & I915_VMA_PIN_MASK; } static inline bool i915_vma_is_pinned(const struct i915_vma *vma) @@ -351,13 +365,13 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); + atomic_inc(&vma->flags); + GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_PIN_OVERFLOW); } static inline void __i915_vma_unpin(struct i915_vma *vma) { - vma->flags--; + atomic_dec(&vma->flags); } static inline void i915_vma_unpin(struct i915_vma *vma) @@ -370,7 +384,7 @@ static inline void i915_vma_unpin(struct i915_vma *vma) static inline bool i915_vma_is_bound(const struct i915_vma *vma, unsigned int where) { - return vma->flags & where; + return atomic_read(&vma->flags) & where; } static inline bool i915_node_color_differs(const struct drm_mm_node *node, diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 7d5fb60b43bb3..173f2d4dbd14f 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,7 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->flags |= I915_VMA_LOCAL_BIND; + set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma)); return 0; } @@ -86,7 +86,7 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); return 0; } From 43ed22753cf18c4bf60de81d567beee908e5e9f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Sep 2019 08:47:27 +0100 Subject: [PATCH 145/331] drm/i915/display: Add glk_cdclk_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 736da8112fee ("drm/i915: Use literal representation of cdclk tables") pushed the cdclk logic into tables, adding glk_cdclk_table but not using yet: drivers/gpu/drm/i915/display/intel_cdclk.c:1173:38: error: ‘glk_cdclk_table’ defined but not used [-Werror=unused-const-variable=] Fixes: 736da8112fee ("drm/i915: Use literal representation of cdclk tables") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Matt Roper Cc: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190911074727.32585-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_cdclk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 618a93bad0a8d..13779b6029f5d 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2511,7 +2511,10 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = bxt_calc_voltage_level; - dev_priv->cdclk.table = bxt_cdclk_table; + if (IS_GEMINILAKE(dev_priv)) + dev_priv->cdclk.table = glk_cdclk_table; + else + dev_priv->cdclk.table = bxt_cdclk_table; } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; From c26a058680dcf3140b6c4e918a16074a5e739b95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 10 Sep 2019 15:13:47 +0300 Subject: [PATCH 146/331] drm/i915: Use a high priority wq for nonblocking plane updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit system_unbound_wq can't keep up sometimes and we get dropped frames. Switch to a high priority variant. Reported-by: Heinrich Fink Tested-by: Heinrich Fink Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190910121347.22958-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_display.c | 6 +++++- drivers/gpu/drm/i915/i915_drv.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3b5275ab66cf0..2668744007b35 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14272,7 +14272,7 @@ static int intel_atomic_commit(struct drm_device *dev, if (nonblock && state->modeset) { queue_work(dev_priv->modeset_wq, &state->base.commit_work); } else if (nonblock) { - queue_work(system_unbound_wq, &state->base.commit_work); + queue_work(dev_priv->flip_wq, &state->base.commit_work); } else { if (state->modeset) flush_workqueue(dev_priv->modeset_wq); @@ -16181,6 +16181,8 @@ int intel_modeset_init(struct drm_device *dev) int ret; dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + dev_priv->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | + WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); drm_mode_config_init(dev); @@ -17139,6 +17141,7 @@ void intel_modeset_driver_remove(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + flush_workqueue(dev_priv->flip_wq); flush_workqueue(dev_priv->modeset_wq); flush_work(&dev_priv->atomic_helper.free_work); @@ -17175,6 +17178,7 @@ void intel_modeset_driver_remove(struct drm_device *dev) intel_gmbus_teardown(dev_priv); + destroy_workqueue(dev_priv->flip_wq); destroy_workqueue(dev_priv->modeset_wq); intel_fbc_cleanup_cfb(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 51d575cca2ac3..2ea11123e933e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1438,6 +1438,8 @@ struct drm_i915_private { /* ordered wq for modesets */ struct workqueue_struct *modeset_wq; + /* unbound hipri wq for page flips/plane updates */ + struct workqueue_struct *flip_wq; /* Display functions */ struct drm_i915_display_funcs display; From cfcbfdd8fe140af699b538a42cf0dd4933885fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 8 Jul 2019 15:53:09 +0300 Subject: [PATCH 147/331] drm/i915: Remove pointless planes_changed=true assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915 doesn't use the crtc_state->plane_changed flag for anything, so setting it is pointless. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190708125325.16576-4-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_atomic.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index d3fb75bb9eb1d..698802da07b73 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -378,13 +378,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, plane->base.id); return PTR_ERR(state); } - - /* - * the plane is added after plane checks are run, - * but since this plane is unchanged just do the - * minimum required validation. - */ - crtc_state->base.planes_changed = true; } intel_plane = to_intel_plane(plane); From 7cb8468bbede8cac0d9ed69b2549788ecaa6b558 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Sep 2019 13:57:17 +0100 Subject: [PATCH 148/331] drm/i915/tgl: Disable read-only ppgtt support The same read-only affliction as befell Icelake is affecting Tigerlake. Disable the read-only support as clearly it was not fixed. Testcase: igt/i915_selftests/live_gem_context References: 3936867dbc1e ("drm/i915: Disable read only ppgtt support for gen11") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190911125717.28997-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e4f66bfe74c29..a09a9b62afbe5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1486,8 +1486,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) * * Gen11 has HSDES#:1807136187 unresolved. Disable ro support * for now. + * + * Gen12 has inherited the same read-only fault issue from gen11. */ - ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11; + ppgtt->vm.has_read_only = !IS_GEN_RANGE(i915, 11, 12); /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. From 023a125d1dded8e1c21bd9974373b697544ba18d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Sep 2019 12:46:55 +0100 Subject: [PATCH 149/331] drm/i915: Squeeze iommu status into debugfs/i915_capabilities There's no easy way of checking whether iommu is enabled for the GPU (you can grep dmesg if you know the device, or you can grep i915_gpu_info if that's available). We do have a central i915_capabilities with the intent of listing such pertinent information, so add the iommu status. Suggested-by: Martin Peres Signed-off-by: Chris Wilson Cc: Martin Peres Cc: Tomi Sarvela Cc: Mika Kuoppala Acked-by: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20190911114655.9254-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 708855e051b5c..e5835337f0224 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -61,11 +61,18 @@ static int i915_capabilities(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_device_info *info = INTEL_INFO(dev_priv); struct drm_printer p = drm_seq_file_printer(m); + const char *msg; seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + msg = "n/a"; +#ifdef CONFIG_INTEL_IOMMU + msg = enableddisabled(intel_iommu_gfx_mapped); +#endif + seq_printf(m, "iommu: %s\n", msg); + intel_device_info_dump_flags(info, &p); intel_device_info_dump_runtime(RUNTIME_INFO(dev_priv), &p); intel_driver_caps_print(&dev_priv->caps, &p); From 249778704c01384d76984d83e6d6377ac96b2cc4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 11 Sep 2019 12:26:08 +0300 Subject: [PATCH 150/331] drm/i915: add INTEL_NUM_PIPES() and use it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abstract away direct access to ->num_pipes to allow further refactoring. No functional changes. Cc: Chris Wilson Cc: José Roberto de Souza Cc: Ville Syrjälä Reviewed-by: José Roberto de Souza Acked-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190911092608.13009-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 12 ++++++------ drivers/gpu/drm/i915/display/intel_display.h | 4 ++-- drivers/gpu/drm/i915/display/intel_lpe_audio.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 +++- drivers/gpu/drm/i915/intel_pm.c | 6 +++--- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2668744007b35..a19f8c73f2e0a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7190,7 +7190,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } } - if (INTEL_INFO(dev_priv)->num_pipes == 2) + if (INTEL_NUM_PIPES(dev_priv) == 2) return 0; /* Ivybridge 3 pipe is really complicated */ @@ -9574,7 +9574,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, * clear if it''s a win or loss power wise. No point in doing * this on ILK at all since it has a fixed DPLL<->pipe mapping. */ - if (INTEL_INFO(dev_priv)->num_pipes == 3 && + if (INTEL_NUM_PIPES(dev_priv) == 3 && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) dpll |= DPLL_SDVO_HIGH_SPEED; @@ -13899,7 +13899,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state) if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, entries, - INTEL_INFO(dev_priv)->num_pipes, i)) + INTEL_NUM_PIPES(dev_priv), i)) continue; updated |= cmask; @@ -16258,8 +16258,8 @@ int intel_modeset_init(struct drm_device *dev) } DRM_DEBUG_KMS("%d display pipe%s available.\n", - INTEL_INFO(dev_priv)->num_pipes, - INTEL_INFO(dev_priv)->num_pipes > 1 ? "s" : ""); + INTEL_NUM_PIPES(dev_priv), + INTEL_NUM_PIPES(dev_priv) > 1 ? "s" : ""); for_each_pipe(dev_priv, pipe) { ret = intel_crtc_init(dev_priv, pipe); @@ -17352,7 +17352,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m, if (!error) return; - err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev_priv)->num_pipes); + err_printf(m, "Num Pipes: %d\n", INTEL_NUM_PIPES(dev_priv)); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) err_printf(m, "PWR_WELL_CTL2: %08x\n", error->power_well_driver); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 33fd523c4622b..f4ddde1716553 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -307,10 +307,10 @@ enum phy_fia { }; #define for_each_pipe(__dev_priv, __p) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) + for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) #define for_each_pipe_masked(__dev_priv, __p, __mask) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ + for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) \ for_each_if((__mask) & BIT(__p)) #define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index b19800b584425..0b67f7887cd0b 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -114,7 +114,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); - pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes; + pdata->num_pipes = INTEL_NUM_PIPES(dev_priv); pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */ pdata->port[0].pipe = -1; pdata->port[1].pipe = -1; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8966672b0294c..c52ef2223580f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -340,7 +340,7 @@ static int i915_driver_modeset_probe(struct drm_device *dev) if (HAS_DISPLAY(dev_priv)) { ret = drm_vblank_init(&dev_priv->drm, - INTEL_INFO(dev_priv)->num_pipes); + INTEL_NUM_PIPES(dev_priv)); if (ret) goto out; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2ea11123e933e..84fb0245cf62b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2188,7 +2188,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 -#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->num_pipes > 0) +#define INTEL_NUM_PIPES(dev_priv) (INTEL_INFO(dev_priv)->num_pipes) + +#define HAS_DISPLAY(dev_priv) (INTEL_NUM_PIPES(dev_priv) > 0) static inline bool intel_vtd_active(void) { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 54a8b7705bc68..d0ceb272551f6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1909,7 +1909,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) for (level = 0; level < wm_state->num_levels; level++) { const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; + const int sr_fifo_size = INTEL_NUM_PIPES(dev_priv) * 512 - 1; if (!vlv_raw_crtc_wm_is_valid(crtc_state, level)) break; @@ -2648,7 +2648,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv, /* HSW allows LP1+ watermarks even with multiple pipes */ if (level == 0 || config->num_pipes_active > 1) { - fifo_size /= INTEL_INFO(dev_priv)->num_pipes; + fifo_size /= INTEL_NUM_PIPES(dev_priv); /* * For some reason the non self refresh @@ -9733,7 +9733,7 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.update_wm = i9xx_update_wm; dev_priv->display.get_fifo_size = i9xx_get_fifo_size; } else if (IS_GEN(dev_priv, 2)) { - if (INTEL_INFO(dev_priv)->num_pipes == 1) { + if (INTEL_NUM_PIPES(dev_priv) == 1) { dev_priv->display.update_wm = i845_update_wm; dev_priv->display.get_fifo_size = i845_get_fifo_size; } else { From 74689ddfb7574e2bf08c7206a4ab0dff04978b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Sep 2019 16:31:26 +0300 Subject: [PATCH 151/331] drm/i915: Fix cdclk bypass freq readout for tgl/bxt/glk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On tgl/bxt/glk the cdclk bypass frequency depends on the PLL reference clock. So let's read out the ref clock before we try to compute the bypass clock. Cc: Matt Roper Fixes: 71dc367e2bc3 ("drm/i915: Consolidate bxt/cnl/icl cdclk readout") Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911133129.27466-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 13779b6029f5d..6fc8e3c0cfba0 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1351,6 +1351,8 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, u32 divider; int div; + bxt_de_pll_readout(dev_priv, cdclk_state); + if (INTEL_GEN(dev_priv) >= 12) cdclk_state->bypass = cdclk_state->ref / 2; else if (INTEL_GEN(dev_priv) >= 11) @@ -1358,7 +1360,6 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, else cdclk_state->bypass = cdclk_state->ref; - bxt_de_pll_readout(dev_priv, cdclk_state); if (cdclk_state->vco == 0) { cdclk_state->cdclk = cdclk_state->bypass; goto out; From 0a12e437049786b9c33542d16b6a03f46fab46f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Sep 2019 16:31:27 +0300 Subject: [PATCH 152/331] drm/i915: Fix CD2X pipe select masking during cdclk sanitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're forgetting to mask off all three pipe select bits from the CDCLK_CTL value on icl+ which may lead to the extra bit being left in. That will cause us to consider the current hardware cdclk state as invalid, and we proceed to sanitize it even though the hardware may have active pipes and whatnot. Fix up the mask so we get rid of all three pipe select bits and thus hopefully no longer sanitize cdclk when it's already correctly programmed. Cc: Matt Roper Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111641 Fixes: 0c1279b58fc7 ("drm/i915: Consolidate {bxt,cnl,icl}_init_cdclk") Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911133129.27466-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 42 ++++++++++++---------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6fc8e3c0cfba0..d838c34d9cac9 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1464,6 +1464,26 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } +static u32 bxt_cdclk_cd2x_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + if (INTEL_GEN(dev_priv) >= 12) { + if (pipe == INVALID_PIPE) + return TGL_CDCLK_CD2X_PIPE_NONE; + else + return TGL_CDCLK_CD2X_PIPE(pipe); + } else if (INTEL_GEN(dev_priv) >= 11) { + if (pipe == INVALID_PIPE) + return ICL_CDCLK_CD2X_PIPE_NONE; + else + return ICL_CDCLK_CD2X_PIPE(pipe); + } else { + if (pipe == INVALID_PIPE) + return BXT_CDCLK_CD2X_PIPE_NONE; + else + return BXT_CDCLK_CD2X_PIPE(pipe); + } +} + static void bxt_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state, enum pipe pipe) @@ -1534,24 +1554,8 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, bxt_de_pll_enable(dev_priv, vco); } - val = divider | skl_cdclk_decimal(cdclk); - - if (INTEL_GEN(dev_priv) >= 12) { - if (pipe == INVALID_PIPE) - val |= TGL_CDCLK_CD2X_PIPE_NONE; - else - val |= TGL_CDCLK_CD2X_PIPE(pipe); - } else if (INTEL_GEN(dev_priv) >= 11) { - if (pipe == INVALID_PIPE) - val |= ICL_CDCLK_CD2X_PIPE_NONE; - else - val |= ICL_CDCLK_CD2X_PIPE(pipe); - } else { - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); - } + val = divider | skl_cdclk_decimal(cdclk) | + bxt_cdclk_cd2x_pipe(dev_priv, pipe); /* * Disable SSA Precharge when CD clock frequency < 500 MHz, @@ -1620,7 +1624,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) * dividers both synching to an active pipe, or asynchronously * (PIPE_NONE). */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + cdctl &= ~bxt_cdclk_cd2x_pipe(dev_priv, INVALID_PIPE); /* Make sure this is a legal cdclk value for the platform */ cdclk = bxt_calc_cdclk(dev_priv, dev_priv->cdclk.hw.cdclk); From 502d1c04f53c787dfd6268578393d340cf614cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Sep 2019 16:31:28 +0300 Subject: [PATCH 153/331] drm/i915: Reuse cnl_modeset_calc_cdclk() on icl+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cnl and icl .modeset_calc_cdclk() functions are identical. Drop one copy. Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911133129.27466-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 37 ++-------------------- 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index d838c34d9cac9..3bc70472fb5fc 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2227,39 +2227,6 @@ static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); - state->cdclk.logical.vco = vco; - state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - max(cnl_calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); - - if (!state->active_pipes) { - cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); - vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); - - state->cdclk.actual.vco = vco; - state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - cnl_calc_voltage_level(cdclk); - } else { - state->cdclk.actual = state->cdclk.logical; - } - - return 0; -} - -static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, cdclk, vco; - - min_cdclk = intel_compute_min_cdclk(state); - if (min_cdclk < 0) - return min_cdclk; - - cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); - vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); - state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = @@ -2499,12 +2466,12 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { if (IS_ELKHARTLAKE(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = ehl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = icl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { From 933122cc7cd26e67dfc7975f821033e89f766963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Sep 2019 16:31:29 +0300 Subject: [PATCH 154/331] drm/i915: Remove duplicated bxt/cnl/icl .modeset_calc_cdclk() funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reuse the same .modeset_calc_cdclk() function for all bxt+. The only difference in between the cnl/icl and the bxt variants is the call to cnl_compute_min_voltage_level(). We can do that call just fine on older platforms since they leave min_voltage_level[] zeroed. Let's rename the function to bxt_compute_min_voltage_level() just so it stays consistent with the rest of the naming scheme. Signed-off-by: Ville Syrjälä Reviewed-by: Matt Roper Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911133129.27466-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 46 +++++----------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 3bc70472fb5fc..ea3f75c72fe84 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2017,6 +2017,10 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) } /* + * Account for port clock min voltage level requirements. + * This only really does something on CNL+ but can be + * called on earlier platforms as well. + * * Note that this functions assumes that 0 is * the lowest voltage value, and higher values * correspond to increasingly higher voltages. @@ -2025,7 +2029,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) * future platforms this code will need to be * adjusted. */ -static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state) +static u8 bxt_compute_min_voltage_level(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -2195,43 +2199,11 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); - state->cdclk.logical.vco = vco; - state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - dev_priv->display.calc_voltage_level(cdclk); - - if (!state->active_pipes) { - cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); - vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); - - state->cdclk.actual.vco = vco; - state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - dev_priv->display.calc_voltage_level(cdclk); - } else { - state->cdclk.actual = state->cdclk.logical; - } - - return 0; -} - -static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, cdclk, vco; - - min_cdclk = intel_compute_min_cdclk(state); - if (min_cdclk < 0) - return min_cdclk; - - cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); - vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); - state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = max(dev_priv->display.calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); + bxt_compute_min_voltage_level(state)); if (!state->active_pipes) { cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); @@ -2466,17 +2438,17 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { if (IS_ELKHARTLAKE(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = ehl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = icl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { From e3cb653d5cc42c2073ad174ae1130dc9e39266e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Sep 2019 18:59:26 +0100 Subject: [PATCH 155/331] drm/i915: Disable FBC if BIOS reserved memory (stolen) is unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FBC requires a couple of contiguous buffers, which we allocate from stolen memory. If stolen memory is unavailable, we cannot allocate those buffers and so cannot support FBC. Mark it so. Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190911175926.31365-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_fbc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index dc34b23e23201..3111ecaeabd0e 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1320,6 +1320,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->enabled = false; fbc->active = false; + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + mkwrite_device_info(dev_priv)->display.has_fbc = false; + if (need_fbc_vtd_wa(dev_priv)) mkwrite_device_info(dev_priv)->display.has_fbc = false; From 582a6f90aa0d9103ab834b3a48a5bb7e4d07cac6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 10:29:32 +0100 Subject: [PATCH 156/331] drm/i915/execlists: Add a paranoid flush of the CSB pointers upon reset After a GPU reset, we need to drain all the CS events so that we have an accurate picture of the execlists state at the time of the reset. Be paranoid and force a read of the CSB write pointer from memory. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190912092933.4729-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dcdf7cf66e7ea..dbc90da2341ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2359,6 +2359,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct i915_request *rq; u32 *regs; + mb(); /* paranoia: read the CSB pointers from after the reset */ + clflush(execlists->csb_write); + mb(); + process_csb(engine); /* drain preemption events */ /* Following the reset, we need to reload the CSB read/write pointers */ From a17592effdc16f3a51ef9c1dda30fe5c6d668263 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 10:29:33 +0100 Subject: [PATCH 157/331] drm/i915/execlists: Ensure the context is reloaded after a GPU reset After we manipulate the context to allow replay after a GPU reset, force that context to be reloaded. This should be a layer of paranoia, for if the GPU was reset, the context will no longer be resident! Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190912092933.4729-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dbc90da2341ab..47d766ccea71e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2445,6 +2445,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) intel_ring_update_space(ce->ring); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); unwind: From 54fc577d90d01a28dbfccd277aa3de4d4abaf1ff Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 11 Sep 2019 17:07:30 +0100 Subject: [PATCH 158/331] drm/i915/pmu: Skip busyness sampling when and where not needed Since d0aa694b9239 ("drm/i915/pmu: Always sample an active ringbuffer") the cost of sampling the engine state on execlists platforms became a little bit higher when both engine busyness and one of the wait states are being monitored. (Previously the busyness sampling on legacy platforms was done via seqno comparison so there was no cost of mmio read.) We can avoid that by skipping busyness sampling when engine supports software busy stats and so avoid the cost of potential mmio read and sample accumulation. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911160730.22687-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 8e251e719390f..623ad32303a17 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -194,6 +194,10 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if (val & RING_WAIT_SEMAPHORE) add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + goto skip; + /* * While waiting on a semaphore or event, MI_MODE reports the * ring as idle. However, previously using the seqno, and with From 8d8b00318593e2852aef19b92f99851d50a203fc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 11 Sep 2019 23:29:08 +0300 Subject: [PATCH 159/331] drm/i915: convert device info num_pipes to pipe_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace device info number of pipes with a bit mask of available pipes. This will prove handy in the future. There's still a bunch of future work to do to actually allow a non-consecutive mask of pipes, but it's a start. No functional changes. Cc: Chris Wilson Cc: José Roberto de Souza Cc: Ville Syrjälä Reviewed-by: José Roberto de Souza Acked-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190911202908.19631-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_pci.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/intel_device_info.c | 10 +++++----- drivers/gpu/drm/i915/intel_device_info.h | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 84fb0245cf62b..bf600888b3f10 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2188,9 +2188,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 -#define INTEL_NUM_PIPES(dev_priv) (INTEL_INFO(dev_priv)->num_pipes) +#define INTEL_NUM_PIPES(dev_priv) (hweight8(INTEL_INFO(dev_priv)->pipe_mask)) -#define HAS_DISPLAY(dev_priv) (INTEL_NUM_PIPES(dev_priv) > 0) +#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0) static inline bool intel_vtd_active(void) { diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index b3cc8560696b6..6981162764413 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -147,7 +147,7 @@ #define I830_FEATURES \ GEN(2), \ .is_mobile = 1, \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_overlay = 1, \ .display.cursor_needs_physical = 1, \ .display.overlay_needs_physical = 1, \ @@ -165,7 +165,7 @@ #define I845_FEATURES \ GEN(2), \ - .num_pipes = 1, \ + .pipe_mask = BIT(PIPE_A), \ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ .display.has_gmch = 1, \ @@ -203,7 +203,7 @@ static const struct intel_device_info intel_i865g_info = { #define GEN3_FEATURES \ GEN(3), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .engine_mask = BIT(RCS0), \ @@ -287,7 +287,7 @@ static const struct intel_device_info intel_pineview_m_info = { #define GEN4_FEATURES \ GEN(4), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ @@ -337,7 +337,7 @@ static const struct intel_device_info intel_gm45_info = { #define GEN5_FEATURES \ GEN(5), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ @@ -363,7 +363,7 @@ static const struct intel_device_info intel_ironlake_m_info = { #define GEN6_FEATURES \ GEN(6), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -411,7 +411,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { #define GEN7_FEATURES \ GEN(7), \ - .num_pipes = 3, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -462,7 +462,7 @@ static const struct intel_device_info intel_ivybridge_q_info = { GEN7_FEATURES, PLATFORM(INTEL_IVYBRIDGE), .gt = 2, - .num_pipes = 0, /* legal, last one wins */ + .pipe_mask = 0, /* legal, last one wins */ .has_l3_dpf = 1, }; @@ -470,7 +470,7 @@ static const struct intel_device_info intel_valleyview_info = { PLATFORM(INTEL_VALLEYVIEW), GEN(7), .is_lp = 1, - .num_pipes = 2, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .has_runtime_pm = 1, .has_rc6 = 1, .has_rps = true, @@ -560,7 +560,7 @@ static const struct intel_device_info intel_broadwell_gt3_info = { static const struct intel_device_info intel_cherryview_info = { PLATFORM(INTEL_CHERRYVIEW), GEN(8), - .num_pipes = 3, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), .display.has_hotplug = 1, .is_lp = 1, .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), @@ -631,7 +631,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .is_lp = 1, \ .display.has_hotplug = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ - .num_pipes = 3, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ .has_64bit_reloc = 1, \ .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ @@ -792,7 +792,7 @@ static const struct intel_device_info intel_elkhartlake_info = { static const struct intel_device_info intel_tigerlake_12_info = { GEN12_FEATURES, PLATFORM(INTEL_TIGERLAKE), - .num_pipes = 4, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, .display.has_modular_fia = 1, .engine_mask = diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d9b5baaef5d0e..50b05a5de53b0 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -896,7 +896,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (i915_modparams.disable_display) { DRM_INFO("Display disabled (module parameter)\n"); - info->num_pipes = 0; + info->pipe_mask = 0; } else if (HAS_DISPLAY(dev_priv) && (IS_GEN_RANGE(dev_priv, 7, 8)) && HAS_PCH_SPLIT(dev_priv)) { @@ -917,14 +917,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) (HAS_PCH_CPT(dev_priv) && !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) { DRM_INFO("Display fused off, disabling\n"); - info->num_pipes = 0; + info->pipe_mask = 0; } else if (fuse_strap & IVB_PIPE_C_DISABLE) { DRM_INFO("PipeC fused off\n"); - info->num_pipes -= 1; + info->pipe_mask &= ~BIT(PIPE_C); } } else if (HAS_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 9) { u32 dfsm = I915_READ(SKL_DFSM); - u8 enabled_mask = BIT(info->num_pipes) - 1; + u8 enabled_mask = info->pipe_mask; if (dfsm & SKL_DFSM_PIPE_A_DISABLE) enabled_mask &= ~BIT(PIPE_A); @@ -945,7 +945,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) DRM_ERROR("invalid pipe fuse configuration: enabled_mask=0x%x\n", enabled_mask); else - info->num_pipes = hweight8(enabled_mask); + info->pipe_mask = enabled_mask; } /* Initialize slice/subslice/EU info */ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 92e0c2e0954c5..d4c288860aed2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -161,7 +161,7 @@ struct intel_device_info { u32 display_mmio_offset; - u8 num_pipes; + u8 pipe_mask; #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); From 16ffe73c186b7897a1a6888ffb4949a8b4026624 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 13:48:13 +0100 Subject: [PATCH 160/331] drm/i915/pmu: Use GT parked for estimating RC6 while asleep As we track when we put the GT device to sleep upon idling, we can use that callback to sample the current rc6 counters and record the timestamp for estimating samples after that point while asleep. v2: Stick to using ktime_t v3: Track user_wakerefs that interfere with the new intel_gt_pm_wait_for_idle v4: No need for parked/unparked estimation if !CONFIG_PM v5: Keep timer park/unpark logic as was v6: Refactor duplicated estimate/update rc6 logic v7: Pull intel_get_pm_get_if_awake() out from the pmu->lock. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190912124813.19225-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 22 +++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 22 +-- drivers/gpu/drm/i915/i915_pmu.c | 242 +++++++++++++---------- drivers/gpu/drm/i915/i915_pmu.h | 4 +- 5 files changed, 170 insertions(+), 121 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3bd764104d41c..a11ad4d914ca8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915) return switch_to_kernel_context_sync(&i915->gt); } +static void user_forcewake(struct intel_gt *gt, bool suspend) +{ + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); +} + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("\n"); @@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); + user_forcewake(&i915->gt, true); + mutex_lock(&i915->drm.struct_mutex); /* @@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915) if (!i915_gem_load_power_context(i915)) goto err_wedged; + user_forcewake(&i915->gt, false); + out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index dc295c196d11c..3039cef64b112 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -50,6 +50,7 @@ struct intel_gt { } timelines; struct intel_wakeref wakeref; + atomic_t user_wakeref; struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e5835337f0224..f3ae525b77c07 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - file->private_data = - (void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm); - intel_uncore_forcewake_user_get(&i915->uncore); + atomic_inc(>->user_wakeref); + intel_gt_pm_get(gt); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_get(gt->uncore); return 0; } @@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - intel_uncore_forcewake_user_put(&i915->uncore); - intel_runtime_pm_put(&i915->runtime_pm, - (intel_wakeref_t)(uintptr_t)file->private_data); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_put(&i915->uncore); + intel_gt_pm_put(gt); + atomic_dec(>->user_wakeref); return 0; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 623ad32303a17..3310353890fb6 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -116,22 +116,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) return enable; } -void i915_pmu_gt_parked(struct drm_i915_private *i915) +static u64 __get_rc6(const struct intel_gt *gt) { - struct i915_pmu *pmu = &i915->pmu; + struct drm_i915_private *i915 = gt->i915; + u64 val; - if (!pmu->base.event_init) - return; + val = intel_rc6_residency_ns(i915, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + + return val; +} + +#if IS_ENABLED(CONFIG_PM) + +static inline s64 ktime_since(const ktime_t kt) +{ + return ktime_to_ns(ktime_sub(ktime_get(), kt)); +} + +static u64 __pmu_estimate_rc6(struct i915_pmu *pmu) +{ + u64 val; - spin_lock_irq(&pmu->lock); /* - * Signal sampling timer to stop if only engine events are enabled and - * GPU went idle. + * We think we are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. */ - pmu->timer_enabled = pmu_needs_timer(pmu, false); - spin_unlock_irq(&pmu->lock); + val = ktime_since(pmu->sleep_last); + val += pmu->sample[__I915_SAMPLE_RC6].cur; + + pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + + return val; +} + +static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val) +{ + /* + * If we are coming back from being runtime suspended we must + * be careful not to report a larger value than returned + * previously. + */ + if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; + pmu->sample[__I915_SAMPLE_RC6].cur = val; + } else { + val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } + + return val; +} + +static u64 get_rc6(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct i915_pmu *pmu = &i915->pmu; + unsigned long flags; + u64 val; + + val = 0; + if (intel_gt_pm_get_if_awake(gt)) { + val = __get_rc6(gt); + intel_gt_pm_put(gt); + } + + spin_lock_irqsave(&pmu->lock, flags); + + if (val) + val = __pmu_update_rc6(pmu, val); + else + val = __pmu_estimate_rc6(pmu); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return val; +} + +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + __pmu_update_rc6(pmu, __get_rc6(&i915->gt)); + + pmu->sleep_last = ktime_get(); +} + +static void unpark_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + /* Estimate how long we slept and accumulate that into rc6 counters */ + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + __pmu_estimate_rc6(pmu); +} + +#else + +static u64 get_rc6(struct intel_gt *gt) +{ + return __get_rc6(gt); } +static void park_rc6(struct drm_i915_private *i915) {} +static void unpark_rc6(struct drm_i915_private *i915) {} + +#endif + static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) { if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { @@ -143,6 +245,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) } } +void i915_pmu_gt_parked(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) + return; + + spin_lock_irq(&pmu->lock); + + park_rc6(i915); + + /* + * Signal sampling timer to stop if only engine events are enabled and + * GPU went idle. + */ + pmu->timer_enabled = pmu_needs_timer(pmu, false); + + spin_unlock_irq(&pmu->lock); +} + void i915_pmu_gt_unparked(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; @@ -151,10 +273,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915) return; spin_lock_irq(&pmu->lock); + /* * Re-enable sampling timer when GPU goes active. */ __i915_pmu_maybe_start_timer(pmu); + + unpark_rc6(i915); + spin_unlock_irq(&pmu->lock); } @@ -430,104 +556,6 @@ static int i915_pmu_event_init(struct perf_event *event) return 0; } -static u64 __get_rc6(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - u64 val; - - val = intel_rc6_residency_ns(i915, - IS_VALLEYVIEW(i915) ? - VLV_GT_RENDER_RC6 : - GEN6_GT_GFX_RC6); - - if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); - - if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); - - return val; -} - -static u64 get_rc6(struct intel_gt *gt) -{ -#if IS_ENABLED(CONFIG_PM) - struct drm_i915_private *i915 = gt->i915; - struct intel_runtime_pm *rpm = &i915->runtime_pm; - struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; - unsigned long flags; - u64 val; - - wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (wakeref) { - val = __get_rc6(gt); - intel_runtime_pm_put(rpm, wakeref); - - /* - * If we are coming back from being runtime suspended we must - * be careful not to report a larger value than returned - * previously. - */ - - spin_lock_irqsave(&pmu->lock, flags); - - if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = val; - } else { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } - - spin_unlock_irqrestore(&pmu->lock, flags); - } else { - struct device *kdev = rpm->kdev; - - /* - * We are runtime suspended. - * - * Report the delta from when the device was suspended to now, - * on top of the last known real value, as the approximated RC6 - * counter value. - */ - spin_lock_irqsave(&pmu->lock, flags); - - /* - * After the above branch intel_runtime_pm_get_if_in_use failed - * to get the runtime PM reference we cannot assume we are in - * runtime suspend since we can either: a) race with coming out - * of it before we took the power.lock, or b) there are other - * states than suspended which can bring us here. - * - * We need to double-check that we are indeed currently runtime - * suspended and if not we cannot do better than report the last - * known RC6 value. - */ - if (pm_runtime_status_suspended(kdev)) { - val = pm_runtime_suspended_time(kdev); - - if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - pmu->suspended_time_last = val; - - val -= pmu->suspended_time_last; - val += pmu->sample[__I915_SAMPLE_RC6].cur; - - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - } else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } else { - val = pmu->sample[__I915_SAMPLE_RC6].cur; - } - - spin_unlock_irqrestore(&pmu->lock, flags); - } - - return val; -#else - return __get_rc6(gt); -#endif -} - static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 4fc4f24783019..067dbbf3bdff0 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -97,9 +97,9 @@ struct i915_pmu { */ struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; /** - * @suspended_time_last: Cached suspend time from PM core. + * @sleep_last: Last time GT parked for RC6 estimation. */ - u64 suspended_time_last; + ktime_t sleep_last; /** * @i915_attr: Memory block holding device attributes. */ From ee73e2795b416b829f0e00e7c43154922dff495b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 14:23:13 +0100 Subject: [PATCH 161/331] drm/i915/tgl: Disable preemption while being debugged We see failures where the context continues executing past a preemption event, eventually leading to situations where a request has executed before we have event submitted it to HW! It seems like tgl is ignoring our RING_TAIL updates, but more likely is that there is a missing update required for our semaphore waits around preemption. v2: And disable internal semaphore usage Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190912132313.12751-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/i915_pci.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 47d766ccea71e..a3f0e49997440 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2939,6 +2939,9 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } + if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */ + engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES; + if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6981162764413..9236fccb3a832 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -798,6 +798,7 @@ static const struct intel_device_info intel_tigerlake_12_info = { .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), .has_rc6 = false, /* XXX disabled for debugging */ + .has_logical_ring_preemption = false, /* XXX disabled for debugging */ }; #undef GEN From 0c653722e6e669fae969c178f0d4a55d9c2dc922 Mon Sep 17 00:00:00 2001 From: Arkadiusz Hiler Date: Thu, 12 Sep 2019 15:54:18 +0300 Subject: [PATCH 162/331] drm/i915: Get the correct wakeref for reading HOTPLUG_EN et al. Without it we get: Unclaimed read from register 0x1e1110 WARNING: CPU: 2 PID: 1029 at drivers/gpu/drm/i915/intel_uncore.c:1101 __unclaimed_reg_debug+0x40/0x50 [i915] Call Trace: fwtable_read32+0x233/0x300 [i915] i915_interrupt_info+0xa73/0xd60 [i915] seq_read+0xdb/0x3c0 full_proxy_read+0x51/0x80 vfs_read+0x9e/0x160 ksys_read+0x8f/0xe0 do_syscall_64+0x55/0x1c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Cc: Chris Wilson Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109824 Signed-off-by: Arkadiusz Hiler Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190912125418.23115-2-arkadiusz.hiler@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f3ae525b77c07..43db50095257c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -534,6 +534,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data) gen8_display_interrupt_info(m); } else if (IS_VALLEYVIEW(dev_priv)) { + intel_wakeref_t pref; + seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); seq_printf(m, "Display IIR:\t%08x\n", @@ -544,7 +546,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IMR)); for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; - intel_wakeref_t pref; power_domain = POWER_DOMAIN_PIPE(pipe); pref = intel_display_power_get_if_enabled(dev_priv, @@ -578,12 +579,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "PM IMR:\t\t%08x\n", I915_READ(GEN6_PMIMR)); + pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); seq_printf(m, "Port hotplug:\t%08x\n", I915_READ(PORT_HOTPLUG_EN)); seq_printf(m, "DPFLIPSTAT:\t%08x\n", I915_READ(VLV_DPFLIPSTAT)); seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref); } else if (!HAS_PCH_SPLIT(dev_priv)) { seq_printf(m, "Interrupt enable: %08x\n", From 0b8d6273db0635acd0dae983a39600b1b96fe77d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 13:26:39 +0100 Subject: [PATCH 163/331] drm/i915/selftests: Keep the engine awake while we keep for preemption Keep the engine awake to ensure that we don't inject any pm-idle requests. References: https://bugs.freedesktop.org/show_bug.cgi?id=111108 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190912122639.25224-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d791158988d63..26d05bd1bdc8b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -897,6 +897,10 @@ static int live_suppress_self_preempt(void *arg) if (!intel_engine_has_preemption(engine)) continue; + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + + intel_engine_pm_get(engine); engine->execlists.preempt_hang.count = 0; rq_a = spinner_create_request(&a.spin, @@ -904,12 +908,14 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -921,6 +927,7 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_b); @@ -931,6 +938,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -944,10 +952,12 @@ static int live_suppress_self_preempt(void *arg) engine->name, engine->execlists.preempt_hang.count, depth); + intel_engine_pm_put(engine); err = -EINVAL; goto err_client_b; } + intel_engine_pm_put(engine); if (igt_flush_test(i915, I915_WAIT_LOCKED)) goto err_wedged; } From c8185520aed6faab58b310f573790badfca27981 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 13 Sep 2019 07:42:00 +0100 Subject: [PATCH 164/331] drm/i915/gtt: Make sure the gen6 ppgtt is bound before first use As we remove the struct_mutex protection from around the vma pinning, counters need to be atomic and aware that there may be multiple threads simultaneously active. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190913064200.24297-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 40 ++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 ++- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a09a9b62afbe5..e62e9d1a13075 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1790,6 +1790,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); + + mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt->base.pd); } @@ -1895,7 +1897,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) int gen6_ppgtt_pin(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - int err; + int err = 0; GEM_BUG_ON(ppgtt->base.vm.closed); @@ -1905,24 +1907,26 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base) * (When vma->pin_count becomes atomic, I expect we will naturally * need a larger, unpacked, type and kill this redundancy.) */ - if (ppgtt->pin_count++) + if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) return 0; + if (mutex_lock_interruptible(&ppgtt->pin_mutex)) + return -EINTR; + /* * PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - err = i915_vma_pin(ppgtt->vma, - 0, GEN6_PD_ALIGN, - PIN_GLOBAL | PIN_HIGH); - if (err) - goto unpin; - - return 0; + if (!atomic_read(&ppgtt->pin_count)) { + err = i915_vma_pin(ppgtt->vma, + 0, GEN6_PD_ALIGN, + PIN_GLOBAL | PIN_HIGH); + } + if (!err) + atomic_inc(&ppgtt->pin_count); + mutex_unlock(&ppgtt->pin_mutex); -unpin: - ppgtt->pin_count = 0; return err; } @@ -1930,22 +1934,20 @@ void gen6_ppgtt_unpin(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - GEM_BUG_ON(!ppgtt->pin_count); - if (--ppgtt->pin_count) - return; - - i915_vma_unpin(ppgtt->vma); + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); } void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - if (!ppgtt->pin_count) + if (!atomic_read(&ppgtt->pin_count)) return; - ppgtt->pin_count = 0; i915_vma_unpin(ppgtt->vma); + atomic_set(&ppgtt->pin_count, 0); } static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) @@ -1958,6 +1960,8 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) if (!ppgtt) return ERR_PTR(-ENOMEM); + mutex_init(&ppgtt->pin_mutex); + ppgtt_init(&ppgtt->base, &i915->gt); ppgtt->base.vm.top = 1; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 201788126a893..8fd2234ba0bf4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -437,7 +437,9 @@ struct gen6_ppgtt { struct i915_vma *vma; gen6_pte_t __iomem *pd_addr; - unsigned int pin_count; + atomic_t pin_count; + struct mutex pin_mutex; + bool scan_for_unused_pt; }; From 12a97df00e2aa6fb2200a8935e78f3e00fab4eb2 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 12 Jul 2019 13:22:13 -0700 Subject: [PATCH 165/331] drm/i915/display/icl: Bump up the hdisplay and vdisplay as per transcoder limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ICL+, the vertical limits for the transcoders are increased to 8192 and horizontal limits are bumped to 16K so bump up limits in intel_mode_valid() v4: * Increase the hdisplay to 16K (Ville) v3: * Supported starting ICL (Ville) * Use the higher limits from TRANS_VTOTAL register (Ville) v2: * Checkpatch warning (Manasi) Cc: Maarten Lankhorst Cc: Ville Syrjälä Signed-off-by: Manasi Navare Reviewed-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190712202214.3906-1-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a19f8c73f2e0a..d77fd6ddac49e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15839,8 +15839,13 @@ intel_mode_valid(struct drm_device *dev, DRM_MODE_FLAG_CLKDIV2)) return MODE_BAD; - if (INTEL_GEN(dev_priv) >= 9 || - IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 11) { + hdisplay_max = 16384; + vdisplay_max = 8192; + htotal_max = 16384; + vtotal_max = 8192; + } else if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { hdisplay_max = 8192; /* FDI max 4096 handled elsewhere */ vdisplay_max = 4096; htotal_max = 8192; From e91c8a29b452671d9af7259cd30980e6a2d78005 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 12 Jul 2019 13:38:08 -0700 Subject: [PATCH 166/331] drm/i915/display/icl: Bump up the plane/fb height MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ICL+, the max supported plane height is 4320, so bump it up To support 4320, we need to increase the number of bits used to read plane_height to 13 as opposed to older 12 bits. v4: * Adjust the width mask also since extra bits are mbz (Ville) v3: * Use 0xffff for mask as extra bits are mbz (Ville) v2: * ICL plane height supported is 4320 (Ville) * Add a new line between max width and max height (Jose) Cc: Maarten Lankhorst Cc: Ville Syrjälä Signed-off-by: Manasi Navare Reviewed-by: Ville Syrjälä Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190712203808.4126-1-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d77fd6ddac49e..4e001113e8285 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3333,6 +3333,16 @@ static int icl_max_plane_width(const struct drm_framebuffer *fb, return 5120; } +static int skl_max_plane_height(void) +{ + return 4096; +} + +static int icl_max_plane_height(void) +{ + return 4320; +} + static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, int main_x, int main_y, u32 main_offset) { @@ -3381,7 +3391,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; int max_width; - int max_height = 4096; + int max_height; u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset; if (INTEL_GEN(dev_priv) >= 11) @@ -3391,6 +3401,11 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) else max_width = skl_max_plane_width(fb, 0, rotation); + if (INTEL_GEN(dev_priv) >= 11) + max_height = icl_max_plane_height(); + else + max_height = skl_max_plane_height(); + if (w > max_width || h > max_height) { DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", w, h, max_width, max_height); @@ -9873,8 +9888,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, offset = I915_READ(PLANE_OFFSET(pipe, plane_id)); val = I915_READ(PLANE_SIZE(pipe, plane_id)); - fb->height = ((val >> 16) & 0xfff) + 1; - fb->width = ((val >> 0) & 0x1fff) + 1; + fb->height = ((val >> 16) & 0xffff) + 1; + fb->width = ((val >> 0) & 0xffff) + 1; val = I915_READ(PLANE_STRIDE(pipe, plane_id)); stride_mult = skl_plane_stride_mult(fb, 0, DRM_MODE_ROTATE_0); From eebab60f224fcfd560957715d08c31564d8672ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 17:08:34 +0100 Subject: [PATCH 167/331] drm/i915: Don't mix srcu tag and negative error codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While srcu may use an integer tag, it does not exclude potential error codes and so may overlap with our own use of -EINTR. Use a separate outparam to store the tag, and report the error code separately. Fixes: 2caffbf11762 ("drm/i915: Revoke mmaps and prevent access to fence registers across reset") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190912160834.30601-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 6 ++---- drivers/gpu/drm/i915/gt/intel_reset.c | 8 +++----- drivers/gpu/drm/i915/gt/intel_reset.h | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 82db2b783123a..1748e63156a24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = intel_gt_reset_trylock(ggtt->vm.gt); - if (srcu < 0) { - ret = srcu; + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + if (ret) goto err_rpm; - } ret = i915_mutex_lock_interruptible(dev); if (ret) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 296bbc7745fb0..8327220ac5585 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1214,10 +1214,8 @@ void intel_gt_handle_error(struct intel_gt *gt, intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } -int intel_gt_reset_trylock(struct intel_gt *gt) +int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) { - int srcu; - might_lock(>->reset.backoff_srcu); might_sleep(); @@ -1232,10 +1230,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt) rcu_read_lock(); } - srcu = srcu_read_lock(>->reset.backoff_srcu); + *srcu = srcu_read_lock(>->reset.backoff_srcu); rcu_read_unlock(); - return srcu; + return 0; } void intel_gt_reset_unlock(struct intel_gt *gt, int tag) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 37a987b171082..52c00199e0694 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, void __i915_request_reset(struct i915_request *rq, bool guilty); -int __must_check intel_gt_reset_trylock(struct intel_gt *gt); +int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu); void intel_gt_reset_unlock(struct intel_gt *gt, int tag); void intel_gt_set_wedged(struct intel_gt *gt); From 6da301e5291717861fb25025b782922d3a36c7af Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 13 Sep 2019 15:55:56 +0100 Subject: [PATCH 168/331] drm/i915/tgl: Limit ourselves to just rcs0 More pruning away of features until we have a stable system and a basis for debugging what's missing. v2: Fixup vdbox/vebox fusing Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190913145556.23912-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_device_info.c | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9236fccb3a832..ee9a7959204cc 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -799,6 +799,7 @@ static const struct intel_device_info intel_tigerlake_12_info = { BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), .has_rc6 = false, /* XXX disabled for debugging */ .has_logical_ring_preemption = false, /* XXX disabled for debugging */ + .engine_mask = BIT(RCS0), /* XXX reduced for debugging */ }; #undef GEN diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 50b05a5de53b0..727089dcd280a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -1004,8 +1004,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) GEN11_GT_VEBOX_DISABLE_SHIFT; for (i = 0; i < I915_MAX_VCS; i++) { - if (!HAS_ENGINE(dev_priv, _VCS(i))) + if (!HAS_ENGINE(dev_priv, _VCS(i))) { + vdbox_mask &= ~BIT(i); continue; + } if (!(BIT(i) & vdbox_mask)) { info->engine_mask &= ~BIT(_VCS(i)); @@ -1026,8 +1028,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv)); for (i = 0; i < I915_MAX_VECS; i++) { - if (!HAS_ENGINE(dev_priv, _VECS(i))) + if (!HAS_ENGINE(dev_priv, _VECS(i))) { + vebox_mask &= ~BIT(i); continue; + } if (!(BIT(i) & vebox_mask)) { info->engine_mask &= ~BIT(_VECS(i)); From c9f8d18710bee1c5b87d54007c24d8bf79029fb6 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 13 Sep 2019 17:16:50 +0300 Subject: [PATCH 169/331] drm/i915: Update Gen11 forcewake ranges Daniele noticed new render ranges in Gen11 fw table. Bspec: 18331 Cc: Daniele Ceraolo Spurio Signed-off-by: Mika Kuoppala Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190913141652.27958-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9e583f13a9e48..732082a720222 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -805,9 +805,6 @@ void assert_forcewakes_active(struct intel_uncore *uncore, /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) -#define GEN11_NEEDS_FORCE_WAKE(reg) \ - ((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000)) - #define __gen6_reg_read_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -903,12 +900,7 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { }) #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - if (GEN11_NEEDS_FORCE_WAKE((offset))) \ - __fwd = find_fw_domain(uncore, offset); \ - __fwd; \ -}) + find_fw_domain(uncore, offset) /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { @@ -1005,8 +997,9 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { #define __gen11_fwtable_reg_write_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd = 0; \ - if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \ - __fwd = find_fw_domain(uncore, offset); \ + const u32 __offset = (offset); \ + if (!is_gen11_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ __fwd; \ }) @@ -1065,9 +1058,11 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), - GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb480, 0xdeff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xdf00, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x16dff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x16e00, 0x19fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1a000, 0x243ff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x40000, 0x1bffff, 0), From cf82d9ddd3b5203dea5561dfabd1216b8159af37 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Fri, 13 Sep 2019 17:16:51 +0300 Subject: [PATCH 170/331] drm/i915/tgl: Introduce gen12 forcewake ranges The media ranges extend beyond what gen11 gives so we can't piggypack on gen11 ranges, even on read side. Introduce a table for gen12 and accessors for it. v2: correctly implement gen12_fwtable_write/read (Daniele) v3: update with ranges from bspec. v4: avoid GEN11_NEEDS_FORCEWAKE (Mika) v5: bspec ref (Daniele) BSpec: 52078 Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Signed-off-by: Michel Thierry Signed-off-by: Mika Kuoppala Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190913141652.27958-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 75 ++++++++++++++++++- drivers/gpu/drm/i915/selftests/intel_uncore.c | 2 + 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 732082a720222..94a97bf8c021c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -902,6 +902,9 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ find_fw_domain(uncore, offset) +#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ + find_fw_domain(uncore, offset) + /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -927,6 +930,20 @@ static const i915_reg_t gen11_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; +static const i915_reg_t gen12_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + /* TODO: Other registers are not yet used */ +}; + static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -949,6 +966,7 @@ static bool is_gen##x##_shadowed(u32 offset) \ __is_genX_shadowed(8) __is_genX_shadowed(11) +__is_genX_shadowed(12) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1003,6 +1021,15 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __gen12_fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (!is_gen12_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), @@ -1075,6 +1102,46 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) }; +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __gen12_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), + GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x147ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x14800, 0x148ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x14900, 0x19fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1a000, 0x1a7ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1a800, 0x1afff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1b000, 0x1bfff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1c000, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), + GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) +}; + static void ilk_dummy_write(struct intel_uncore *uncore) { @@ -1223,6 +1290,7 @@ __gen_read(func, 16) \ __gen_read(func, 32) \ __gen_read(func, 64) +__gen_reg_read_funcs(gen12_fwtable); __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); __gen_reg_read_funcs(gen6); @@ -1314,6 +1382,7 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) +__gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); __gen_reg_write_funcs(gen8); @@ -1685,10 +1754,14 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { + } else if (IS_GEN(i915, 11)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 86815c6072a17..0ffb141eb988e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -67,6 +67,7 @@ static int intel_shadow_table_check(void) } reg_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, + { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, }; const i915_reg_t *reg; unsigned int i, j; @@ -101,6 +102,7 @@ int intel_uncore_mock_selftests(void) { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, + { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, }; int err, i; From 2f7155629c9dd4d4ed0c2ec95a8982c84882b9e2 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 13 Sep 2019 21:06:38 +0100 Subject: [PATCH 171/331] drm/i915/tgl: Re-enable rc6 We think that we got rc6 problems sorted out. Flip the switch and let CI expose our tendency to naive optimism. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190913200638.31939-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ee9a7959204cc..e4a26bbd87882 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -797,7 +797,6 @@ static const struct intel_device_info intel_tigerlake_12_info = { .display.has_modular_fia = 1, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), - .has_rc6 = false, /* XXX disabled for debugging */ .has_logical_ring_preemption = false, /* XXX disabled for debugging */ .engine_mask = BIT(RCS0), /* XXX reduced for debugging */ }; From a2b69ea4d26d1077b00924ff34e46b28d997aae2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 13 Sep 2019 13:04:07 +0300 Subject: [PATCH 172/331] drm/i915: introduce INTEL_DISPLAY_ENABLED() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for making a distinction between not having display and having disabled display. Add INTEL_DISPLAY_ENABLED() and use it where HAS_DISPLAY() is used after intel_device_info_runtime_init(). This is initially duplication, as disabling display still leads to ->pipe_mask = 0 and HAS_DISPLAY() being false. Note that ever since i915.display_disable was introduced, it has not affected PCH detection even if it uses HAS_DISPLAY(), as display disable happens after that. Since INTEL_DISPLAY_ENABLED() will not make sense unless HAS_DISPLAY() is true, include a warning for catching misuses making decisions on INTEL_DISPLAY_ENABLED() when HAS_DISPLAY() is false. v2: Remove INTEL_DISPLAY_ENABLED() check from intel_detect_pch() (Chris) Cc: Chris Wilson Cc: José Roberto de Souza Cc: Ville Syrjälä Reviewed-by: José Roberto de Souza Acked-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190913100407.30991-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 2 +- drivers/gpu/drm/i915/display/intel_display.c | 4 ++-- drivers/gpu/drm/i915/display/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/display/intel_gmbus.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 8 ++++---- drivers/gpu/drm/i915/i915_drv.h | 3 +++ 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index efb39f350b196..1def550c68c8b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1833,7 +1833,7 @@ void intel_bios_init(struct drm_i915_private *dev_priv) const struct bdb_header *bdb; u8 __iomem *bios = NULL; - if (!HAS_DISPLAY(dev_priv)) { + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) { DRM_DEBUG_KMS("Skipping VBT init due to disabled display.\n"); return; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4e001113e8285..e75945a53e06c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15381,7 +15381,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_pps_init(dev_priv); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return; if (INTEL_GEN(dev_priv) >= 12) { @@ -17293,7 +17293,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder)); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return NULL; error = kzalloc(sizeof(*error), GFP_ATOMIC); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index d59eee5c5d9c2..68338669f0542 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -444,7 +444,7 @@ int intel_fbdev_init(struct drm_device *dev) struct intel_fbdev *ifbdev; int ret; - if (WARN_ON(!HAS_DISPLAY(dev_priv))) + if (WARN_ON(!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv))) return -ENODEV; ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index d6775a0057265..3d4d19ac1d145 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -836,7 +836,7 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv) unsigned int pin; int ret; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return 0; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c52ef2223580f..c95986a984c81 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -338,7 +338,7 @@ static int i915_driver_modeset_probe(struct drm_device *dev) if (i915_inject_probe_failure(dev_priv)) return -ENODEV; - if (HAS_DISPLAY(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { ret = drm_vblank_init(&dev_priv->drm, INTEL_NUM_PIPES(dev_priv)); if (ret) @@ -389,7 +389,7 @@ static int i915_driver_modeset_probe(struct drm_device *dev) intel_overlay_setup(dev_priv); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return 0; ret = intel_fbdev_init(dev); @@ -1415,7 +1415,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) } else DRM_ERROR("Failed to register driver for userspace access!\n"); - if (HAS_DISPLAY(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { /* Must be done after probing outputs */ intel_opregion_register(dev_priv); acpi_video_register(); @@ -1438,7 +1438,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * We need to coordinate the hotplugs with the asynchronous fbdev * configuration, for which we use the fbdev->async_cookie. */ - if (HAS_DISPLAY(dev_priv)) + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) drm_kms_helper_poll_init(dev); intel_power_domains_enable(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bf600888b3f10..4faec2f94e19e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2192,6 +2192,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0) +/* Only valid when HAS_DISPLAY() is true */ +#define INTEL_DISPLAY_ENABLED(dev_priv) (WARN_ON(!HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display) + static inline bool intel_vtd_active(void) { #ifdef CONFIG_INTEL_IOMMU From d1d23d7f4be6a619af4ab729a12856e9cbded985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 Sep 2019 22:31:54 +0300 Subject: [PATCH 173/331] drm/i915: Replace is_planar_yuv_format() with drm_format_info_is_yuv_semiplanar() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a helper in drm_fourcc.h these days to check of we're dealing with a two plane YUV format. Make use if it. Also s/plane/color_plane/ in skl_plane_relative_data_rate() to reduce the confusion. Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190913193157.9556-2-ville.syrjala@linux.intel.com --- .../gpu/drm/i915/display/intel_atomic_plane.c | 5 ++-- drivers/gpu/drm/i915/display/intel_display.c | 10 +++---- drivers/gpu/drm/i915/display/intel_display.h | 3 ++- drivers/gpu/drm/i915/display/intel_sprite.c | 20 +++----------- drivers/gpu/drm/i915/display/intel_sprite.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 27 +++++++++---------- 6 files changed, 27 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index d1fcdf206da41..476ef0906ba04 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -144,6 +144,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_plane_state *new_plane_state) { struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane); + const struct drm_framebuffer *fb = new_plane_state->base.fb; int ret; new_crtc_state->active_planes &= ~BIT(plane->id); @@ -164,11 +165,11 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ new_crtc_state->active_planes |= BIT(plane->id); if (new_plane_state->base.visible && - is_planar_yuv_format(new_plane_state->base.fb->format->format)) + drm_format_info_is_yuv_semiplanar(fb->format)) new_crtc_state->nv12_planes |= BIT(plane->id); if (new_plane_state->base.visible && - new_plane_state->base.fb->format->format == DRM_FORMAT_C8) + fb->format->format == DRM_FORMAT_C8) new_crtc_state->c8_planes |= BIT(plane->id); if (new_plane_state->base.visible || old_plane_state->base.visible) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e75945a53e06c..276adc3035641 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3545,7 +3545,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) * Handle the AUX surface first since * the main surface setup depends on it. */ - if (is_planar_yuv_format(fb->format->format)) { + if (drm_format_info_is_yuv_semiplanar(fb->format)) { ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; @@ -5463,7 +5463,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, return 0; } - if (format && is_planar_yuv_format(format->format) && + if (format && drm_format_info_is_yuv_semiplanar(format) && (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { DRM_DEBUG_KMS("Planar YUV: src dimensions not met\n"); return -EINVAL; @@ -5540,7 +5540,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, /* Pre-gen11 and SDR planes always need a scaler for planar formats. */ if (!icl_is_hdr_plane(dev_priv, intel_plane->id) && - fb && is_planar_yuv_format(fb->format->format)) + fb && drm_format_info_is_yuv_semiplanar(fb->format)) need_scaler = true; ret = skl_update_scaler(crtc_state, force_detach, @@ -14552,7 +14552,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, int skl_max_scale(const struct intel_crtc_state *crtc_state, - u32 pixel_format) + const struct drm_format_info *format) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -14577,7 +14577,7 @@ skl_max_scale(const struct intel_crtc_state *crtc_state, * or * cdclk/crtc_clock */ - mult = is_planar_yuv_format(pixel_format) ? 2 : 3; + mult = drm_format_info_is_yuv_semiplanar(format) ? 2 : 3; tmpclk1 = (1 << 16) * mult - 1; tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); max_scale = min(tmpclk1, tmpclk2); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index f4ddde1716553..66330fcb10d48 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -34,6 +34,7 @@ struct drm_connector; struct drm_device; struct drm_encoder; struct drm_file; +struct drm_format_info; struct drm_framebuffer; struct drm_i915_error_state_buf; struct drm_i915_gem_object; @@ -548,7 +549,7 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(const struct intel_crtc_state *crtc_state, - u32 pixel_format); + const struct drm_format_info *format); u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index e415b0ad4a425..7a7078d0ba23d 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -48,19 +48,6 @@ #include "intel_psr.h" #include "intel_sprite.h" -bool is_planar_yuv_format(u32 pixelformat) -{ - switch (pixelformat) { - case DRM_FORMAT_NV12: - case DRM_FORMAT_P010: - case DRM_FORMAT_P012: - case DRM_FORMAT_P016: - return true; - default: - return false; - } -} - int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs) { @@ -361,6 +348,7 @@ skl_program_scaler(struct intel_plane *plane, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; enum pipe pipe = plane->pipe; int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler = @@ -381,7 +369,7 @@ skl_program_scaler(struct intel_plane *plane, 0, INT_MAX); /* TODO: handle sub-pixel coordinates */ - if (is_planar_yuv_format(plane_state->base.fb->format->format) && + if (drm_format_info_is_yuv_semiplanar(fb->format) && !icl_is_hdr_plane(dev_priv, plane->id)) { y_hphase = skl_scaler_calc_phase(1, hscale, false); y_vphase = skl_scaler_calc_phase(1, vscale, false); @@ -1790,7 +1778,7 @@ static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_s int src_w = drm_rect_width(&plane_state->base.src) >> 16; /* Display WA #1106 */ - if (is_planar_yuv_format(fb->format->format) && src_w & 3 && + if (drm_format_info_is_yuv_semiplanar(fb->format) && src_w & 3 && (rotation == DRM_MODE_ROTATE_270 || rotation == (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90))) { DRM_DEBUG_KMS("src width must be multiple of 4 for rotated planar YUV\n"); @@ -1817,7 +1805,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, /* use scaler when colorkey is not required */ if (!plane_state->ckey.flags && intel_fb_scalable(fb)) { min_scale = 1; - max_scale = skl_max_scale(crtc_state, fb->format->format); + max_scale = skl_max_scale(crtc_state, fb->format); } ret = drm_atomic_helper_check_plane_state(&plane_state->base, diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h index 093a2d156f1ed..229336214f685 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.h +++ b/drivers/gpu/drm/i915/display/intel_sprite.h @@ -17,7 +17,6 @@ struct drm_i915_private; struct intel_crtc_state; struct intel_plane_state; -bool is_planar_yuv_format(u32 pixelformat); int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs); struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d0ceb272551f6..6aa40f5462266 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4009,7 +4009,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); - if (is_planar_yuv_format(fourcc)) + if (fourcc && + drm_format_info_is_yuv_semiplanar(drm_format_info(fourcc))) swap(val, val2); skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val); @@ -4197,25 +4198,23 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, static u64 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, - const int plane) + int color_plane) { - struct intel_plane *intel_plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + const struct drm_framebuffer *fb = plane_state->base.fb; u32 data_rate; u32 width = 0, height = 0; - struct drm_framebuffer *fb; - u32 format; uint_fixed_16_16_t down_scale_amount; u64 rate; if (!plane_state->base.visible) return 0; - fb = plane_state->base.fb; - format = fb->format->format; - - if (intel_plane->id == PLANE_CURSOR) + if (plane->id == PLANE_CURSOR) return 0; - if (plane == 1 && !is_planar_yuv_format(format)) + + if (color_plane == 1 && + !drm_format_info_is_yuv_semiplanar(fb->format)) return 0; /* @@ -4227,7 +4226,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, height = drm_rect_height(&plane_state->base.src) >> 16; /* UV plane does 1/2 pixel sub-sampling */ - if (plane == 1 && is_planar_yuv_format(format)) { + if (color_plane == 1) { width /= 2; height /= 2; } @@ -4238,7 +4237,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount); - rate *= fb->format->cpp[plane]; + rate *= fb->format->cpp[color_plane]; return rate; } @@ -4643,7 +4642,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, u32 interm_pbpl; /* only planar format has two planes */ - if (color_plane == 1 && !is_planar_yuv_format(format->format)) { + if (color_plane == 1 && !drm_format_info_is_yuv_semiplanar(format)) { DRM_DEBUG_KMS("Non planar format have single plane\n"); return -EINVAL; } @@ -4655,7 +4654,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS; - wp->is_planar = is_planar_yuv_format(format->format); + wp->is_planar = drm_format_info_is_yuv_semiplanar(format); wp->width = width; if (color_plane == 1 && wp->is_planar) From 76c36a4391641aa37ce7adede32d511a07450d5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 Sep 2019 22:31:55 +0300 Subject: [PATCH 174/331] drm/i915: Allow downscale factor of <3.0 on glk+ for all formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec says that glk+ max downscale factor is <3.0 for all pixel formats. Older platforms had a max of <2.0 for NV12. Update the code to deal with this. Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190913193157.9556-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 276adc3035641..1a72706ebee28 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14556,7 +14556,7 @@ skl_max_scale(const struct intel_crtc_state *crtc_state, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int max_scale, mult; + int max_scale; int crtc_clock, max_dotclk, tmpclk1, tmpclk2; if (!crtc_state->base.enable) @@ -14577,8 +14577,11 @@ skl_max_scale(const struct intel_crtc_state *crtc_state, * or * cdclk/crtc_clock */ - mult = drm_format_info_is_yuv_semiplanar(format) ? 2 : 3; - tmpclk1 = (1 << 16) * mult - 1; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || + !drm_format_info_is_yuv_semiplanar(format)) + tmpclk1 = 0x30000 - 1; + else + tmpclk1 = 0x20000 - 1; tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); max_scale = min(tmpclk1, tmpclk2); From fe4709a8d033975a328a665a627acd35e628117a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 Sep 2019 22:31:56 +0300 Subject: [PATCH 175/331] drm/i915: Extract intel_modeset_calc_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exfiltrate the cdclk code from intel_modeset_checks() into intel_modeset_calc_cdclk(). Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190913193157.9556-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_cdclk.c | 135 ++++++++++++++++++- drivers/gpu/drm/i915/display/intel_cdclk.h | 6 +- drivers/gpu/drm/i915/display/intel_display.c | 123 +---------------- 3 files changed, 135 insertions(+), 129 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index ea3f75c72fe84..43564295b8647 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -21,6 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include "intel_atomic.h" #include "intel_cdclk.h" #include "intel_display_types.h" #include "intel_sideband.h" @@ -1772,9 +1773,9 @@ bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, * Returns: * True if the CDCLK states require just a cd2x divider update, false if not. */ -bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b) +static bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) { /* Older hw doesn't have the capability */ if (INTEL_GEN(dev_priv) < 10 && !IS_GEN9_LP(dev_priv)) @@ -1793,8 +1794,8 @@ bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, * Returns: * True if the CDCLK states don't match, false if they do. */ -bool intel_cdclk_changed(const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b) +static bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) { return intel_cdclk_needs_modeset(a, b) || a->voltage_level != b->voltage_level; @@ -2220,6 +2221,130 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) return 0; } +static int intel_lock_all_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + /* Add all pipes to the state */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + return 0; +} + +static int intel_modeset_all_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + + /* + * Add all pipes to the state, and force + * a modeset on all the active ones. + */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + int ret; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (!crtc_state->base.active || + drm_atomic_crtc_needs_modeset(&crtc_state->base)) + continue; + + crtc_state->base.mode_changed = true; + + ret = drm_atomic_add_affected_connectors(&state->base, + &crtc->base); + if (ret) + return ret; + + ret = drm_atomic_add_affected_planes(&state->base, + &crtc->base); + if (ret) + return ret; + + crtc_state->update_planes |= crtc_state->active_planes; + } + + return 0; +} + +int intel_modeset_calc_cdclk(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + enum pipe pipe; + int ret; + + if (!dev_priv->display.modeset_calc_cdclk) + return 0; + + ret = dev_priv->display.modeset_calc_cdclk(state); + if (ret) + return ret; + + /* + * Writes to dev_priv->cdclk.logical must protected by + * holding all the crtc locks, even if we don't end up + * touching the hardware + */ + if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &state->cdclk.logical)) { + ret = intel_lock_all_pipes(state); + if (ret < 0) + return ret; + } + + if (is_power_of_2(state->active_pipes)) { + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + + pipe = ilog2(state->active_pipes); + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + if (crtc_state && + drm_atomic_crtc_needs_modeset(&crtc_state->base)) + pipe = INVALID_PIPE; + } else { + pipe = INVALID_PIPE; + } + + /* All pipes must be switched off while we change the cdclk. */ + if (pipe != INVALID_PIPE && + intel_cdclk_needs_cd2x_update(dev_priv, + &dev_priv->cdclk.actual, + &state->cdclk.actual)) { + ret = intel_lock_all_pipes(state); + if (ret) + return ret; + + state->cdclk.pipe = pipe; + } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, + &state->cdclk.actual)) { + ret = intel_modeset_all_pipes(state); + if (ret) + return ret; + + state->cdclk.pipe = INVALID_PIPE; + } + + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + state->cdclk.logical.cdclk, + state->cdclk.actual.cdclk); + DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", + state->cdclk.logical.voltage_level, + state->cdclk.actual.voltage_level); + + return 0; +} + static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 1afa84ab60186..cf71394cc79cc 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -29,13 +29,8 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); -bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b); bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); -bool intel_cdclk_changed(const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b); void intel_cdclk_swap_state(struct intel_atomic_state *state); void intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv, @@ -49,5 +44,6 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, enum pipe pipe); void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, const char *context); +int intel_modeset_calc_cdclk(struct intel_atomic_state *state); #endif /* __INTEL_CDCLK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1a72706ebee28..befebfd5db66b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13433,65 +13433,12 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) return 0; } -static int intel_lock_all_pipes(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - /* Add all pipes to the state */ - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} - -static int intel_modeset_all_pipes(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - /* - * Add all pipes to the state, and force - * a modeset on all the active ones. - */ - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - int ret; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - - if (!crtc_state->base.active || needs_modeset(crtc_state)) - continue; - - crtc_state->base.mode_changed = true; - - ret = drm_atomic_add_affected_connectors(&state->base, - &crtc->base); - if (ret) - return ret; - - ret = drm_atomic_add_affected_planes(&state->base, - &crtc->base); - if (ret) - return ret; - } - - return 0; -} - static int intel_modeset_checks(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; - int ret = 0, i; + int ret, i; if (!check_digital_port_conflicts(state)) { DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); @@ -13519,71 +13466,9 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->active_pipe_changes |= BIT(crtc->pipe); } - /* - * See if the config requires any additional preparation, e.g. - * to adjust global state with pipes off. We need to do this - * here so we can get the modeset_pipe updated config for the new - * mode set on this crtc. For other crtcs we need to use the - * adjusted_mode bits in the crtc directly. - */ - if (dev_priv->display.modeset_calc_cdclk) { - enum pipe pipe; - - ret = dev_priv->display.modeset_calc_cdclk(state); - if (ret < 0) - return ret; - - /* - * Writes to dev_priv->cdclk.logical must protected by - * holding all the crtc locks, even if we don't end up - * touching the hardware - */ - if (intel_cdclk_changed(&dev_priv->cdclk.logical, - &state->cdclk.logical)) { - ret = intel_lock_all_pipes(state); - if (ret < 0) - return ret; - } - - if (is_power_of_2(state->active_pipes)) { - struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - - pipe = ilog2(state->active_pipes); - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - if (crtc_state && needs_modeset(crtc_state)) - pipe = INVALID_PIPE; - } else { - pipe = INVALID_PIPE; - } - - /* All pipes must be switched off while we change the cdclk. */ - if (pipe != INVALID_PIPE && - intel_cdclk_needs_cd2x_update(dev_priv, - &dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_lock_all_pipes(state); - if (ret < 0) - return ret; - - state->cdclk.pipe = pipe; - } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_modeset_all_pipes(state); - if (ret < 0) - return ret; - - state->cdclk.pipe = INVALID_PIPE; - } - - DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", - state->cdclk.logical.cdclk, - state->cdclk.actual.cdclk); - DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", - state->cdclk.logical.voltage_level, - state->cdclk.actual.voltage_level); - } + ret = intel_modeset_calc_cdclk(state); + if (ret) + return ret; intel_modeset_clear_plls(state); From 2e7f76c1e4b6480226cee3f6e358172891012882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 Sep 2019 22:31:57 +0300 Subject: [PATCH 176/331] drm/i915: s/pipe_config/crtc_state/ in intel_crtc_atomic_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up the mess with the drm vs. intel types in intel_crtc_atomic_check() and rename varibles accordingly. Reviewed-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190913193157.9556-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 54 ++++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index befebfd5db66b..12bb8f951edf6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11760,25 +11760,24 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state) return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes; } -static int intel_crtc_atomic_check(struct drm_crtc *crtc, - struct drm_crtc_state *crtc_state) +static int intel_crtc_atomic_check(struct drm_crtc *_crtc, + struct drm_crtc_state *_crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(_crtc_state); int ret; - bool mode_changed = needs_modeset(pipe_config); + bool mode_changed = needs_modeset(crtc_state); if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) && - mode_changed && !crtc_state->active) - pipe_config->update_wm_post = true; + mode_changed && !crtc_state->base.active) + crtc_state->update_wm_post = true; - if (mode_changed && crtc_state->enable && + if (mode_changed && crtc_state->base.enable && dev_priv->display.crtc_compute_clock && - !WARN_ON(pipe_config->shared_dpll)) { - ret = dev_priv->display.crtc_compute_clock(intel_crtc, - pipe_config); + !WARN_ON(crtc_state->shared_dpll)) { + ret = dev_priv->display.crtc_compute_clock(crtc, crtc_state); if (ret) return ret; } @@ -11787,19 +11786,19 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, * May need to update pipe gamma enable bits * when C8 planes are getting enabled/disabled. */ - if (c8_planes_changed(pipe_config)) - crtc_state->color_mgmt_changed = true; + if (c8_planes_changed(crtc_state)) + crtc_state->base.color_mgmt_changed = true; - if (mode_changed || pipe_config->update_pipe || - crtc_state->color_mgmt_changed) { - ret = intel_color_check(pipe_config); + if (mode_changed || crtc_state->update_pipe || + crtc_state->base.color_mgmt_changed) { + ret = intel_color_check(crtc_state); if (ret) return ret; } ret = 0; if (dev_priv->display.compute_pipe_wm) { - ret = dev_priv->display.compute_pipe_wm(pipe_config); + ret = dev_priv->display.compute_pipe_wm(crtc_state); if (ret) { DRM_DEBUG_KMS("Target pipe watermarks are invalid\n"); return ret; @@ -11815,7 +11814,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, * old state and the new state. We can program these * immediately. */ - ret = dev_priv->display.compute_intermediate_wm(pipe_config); + ret = dev_priv->display.compute_intermediate_wm(crtc_state); if (ret) { DRM_DEBUG_KMS("No valid intermediate pipe watermarks are possible\n"); return ret; @@ -11823,21 +11822,20 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, } if (INTEL_GEN(dev_priv) >= 9) { - if (mode_changed || pipe_config->update_pipe) - ret = skl_update_scaler_crtc(pipe_config); + if (mode_changed || crtc_state->update_pipe) + ret = skl_update_scaler_crtc(crtc_state); if (!ret) - ret = icl_check_nv12_planes(pipe_config); + ret = icl_check_nv12_planes(crtc_state); if (!ret) - ret = skl_check_pipe_max_pixel_rate(intel_crtc, - pipe_config); + ret = skl_check_pipe_max_pixel_rate(crtc, crtc_state); if (!ret) - ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, - pipe_config); + ret = intel_atomic_setup_scalers(dev_priv, crtc, + crtc_state); } if (HAS_IPS(dev_priv)) - pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config); + crtc_state->ips_enabled = hsw_compute_ips_config(crtc_state); return ret; } From ef404bc6592075925997d3626e968e4bcd59721f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 16 Sep 2019 12:29:01 +0300 Subject: [PATCH 177/331] drm/i915: stop conflating HAS_DISPLAY() and disabled display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop setting ->pipe_mask to zero when display is disabled, allowing us to have different code paths for not actually having display hardware, and having display hardware disabled. This lets us develop those two avenues independently. There are no functional changes for when there is no display. However, all uses of for_each_pipe() and for_each_pipe_masked() will start running for the disabled display case. Put one of the more significant ones behind checks for INTEL_DISPLAY_ENABLED(), otherwise the cases should not be hit with disabled display, or they seem benign. Fingers crossed. All in all, this might not be the ideal solution. In fact we may have had something along the lines of this in the past, but we ended up conflating the two cases. Possibly even by recommendation by yours truly; I did not dare dig up that part of the history. But the perfect is the enemy of the good, this is a straightforward change, and lets us get actual work done in both fronts without interfering with each other. Cc: Chris Wilson Cc: José Roberto de Souza Cc: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190916092901.31440-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 12 +++++++----- drivers/gpu/drm/i915/intel_device_info.c | 8 ++------ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 12bb8f951edf6..1cc74844d3ea6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16167,11 +16167,13 @@ int intel_modeset_init(struct drm_device *dev) INTEL_NUM_PIPES(dev_priv), INTEL_NUM_PIPES(dev_priv) > 1 ? "s" : ""); - for_each_pipe(dev_priv, pipe) { - ret = intel_crtc_init(dev_priv, pipe); - if (ret) { - drm_mode_config_cleanup(dev); - return ret; + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { + for_each_pipe(dev_priv, pipe) { + ret = intel_crtc_init(dev_priv, pipe); + if (ret) { + drm_mode_config_cleanup(dev); + return ret; + } } } diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 727089dcd280a..728c881718a2d 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -894,12 +894,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) runtime->num_sprites[pipe] = 1; } - if (i915_modparams.disable_display) { - DRM_INFO("Display disabled (module parameter)\n"); - info->pipe_mask = 0; - } else if (HAS_DISPLAY(dev_priv) && - (IS_GEN_RANGE(dev_priv, 7, 8)) && - HAS_PCH_SPLIT(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && IS_GEN_RANGE(dev_priv, 7, 8) && + HAS_PCH_SPLIT(dev_priv)) { u32 fuse_strap = I915_READ(FUSE_STRAP); u32 sfuse_strap = I915_READ(SFUSE_STRAP); From bb120e1171a966429287865134265811887daa6c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 15 Sep 2019 21:37:00 +0100 Subject: [PATCH 178/331] drm/i915: Show the logical context ring state on dumping Include the active context register state when dumping the engine. Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Stuart Summers Link: https://patchwork.freedesktop.org/patch/msgid/20190915203701.29163-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index a8014c59b3889..3c176b0f4b453 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1404,6 +1404,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->timeline->hwsp_offset); print_request_ring(m, rq); + + if (rq->hw_context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE); + } } spin_unlock_irqrestore(&engine->active.lock, flags); From 80fa64d62067ea7390cd27d924dcb7c028e455e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Sep 2019 09:00:29 +0100 Subject: [PATCH 179/331] drm/i915: Only apply a rmw mmio update if the value changes If we try to clear, or even set, a bit in the register that doesn't change the register state; skip the write. There's a slight danger in that the register acts as a latch-on-write, but I do not think we use a rmw cycle with any such latch registers. Suggested-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190917080029.27632-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 414fc2cb04597..dcfa243892c67 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -378,23 +378,23 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore, static inline void intel_uncore_rmw(struct intel_uncore *uncore, i915_reg_t reg, u32 clear, u32 set) { - u32 val; + u32 old, val; - val = intel_uncore_read(uncore, reg); - val &= ~clear; - val |= set; - intel_uncore_write(uncore, reg, val); + old = intel_uncore_read(uncore, reg); + val = (old & ~clear) | set; + if (val != old) + intel_uncore_write(uncore, reg, val); } static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clear, u32 set) { - u32 val; + u32 old, val; - val = intel_uncore_read_fw(uncore, reg); - val &= ~clear; - val |= set; - intel_uncore_write_fw(uncore, reg, val); + old = intel_uncore_read_fw(uncore, reg); + val = (old & ~clear) | set; + if (val != old) + intel_uncore_write_fw(uncore, reg, val); } static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, From c210e85b8f3371168ce78c8da00b913839a84ec7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Sep 2019 13:30:55 +0100 Subject: [PATCH 180/331] drm/i915/tgl: Extend MI_SEMAPHORE_WAIT On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to update the length field and emit that extra parameter and any padding noop as required. v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT v3: Use int instead of bool in the addition so that readers are not left wondering about the intricacies of the C spec. Now they just have to worry what the integer value of a boolean operation is... Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniele Ceraolo Spurio Cc: Michal Winiarski Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190917123055.28965-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 3 + drivers/gpu/drm/i915/gt/intel_lrc.c | 71 ++++++++++++++++++-- drivers/gpu/drm/i915/i915_pci.c | 1 - drivers/gpu/drm/i915/i915_request.c | 21 ++++-- 4 files changed, 84 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index fbad403ab7ac0..f78b13d74e17a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -112,6 +112,7 @@ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) @@ -119,6 +120,8 @@ #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) +#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) +#define MI_SEMAPHORE_TOKEN_SHIFT 5 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a3f0e49997440..a99166a2d2eb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) return gen8_emit_fini_breadcrumb_footer(request, cs); } +static u32 * +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + return gen8_emit_fini_breadcrumb_footer(request, cs); +} + /* * Note that the CS instruction pre-parser will not stall on the breadcrumb * flush and will continue pre-fetching the instructions after it before the @@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) * All the above applies only to the instructions themselves. Non-inline data * used by the instructions is not pre-fetched. */ -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, - u32 *cs) + +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + + return cs; +} + +static __always_inline u32* +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_USER_INTERRUPT; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = gen12_emit_preempt_busywait(request, cs); + + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); + + return gen8_emit_wa_tail(request, cs); +} + +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + request->timeline->hwsp_offset, + 0); + + return gen12_emit_fini_breadcrumb_footer(request, cs); +} + +static u32 * +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, @@ -2910,7 +2967,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_footer(request, cs); } static void execlists_park(struct intel_engine_cs *engine) @@ -2939,9 +2996,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } - if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */ - engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES; - if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } @@ -2971,6 +3025,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; + if (INTEL_GEN(engine->i915) >= 12) + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -3016,6 +3072,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { case 12: + engine->emit_flush = gen11_emit_flush_render; + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; + break; case 11: engine->emit_flush = gen11_emit_flush_render; engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index e4a26bbd87882..fe6941c8fc99d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -797,7 +797,6 @@ static const struct intel_device_info intel_tigerlake_12_info = { .display.has_modular_fia = 1, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), - .has_logical_ring_preemption = false, /* XXX disabled for debugging */ .engine_mask = BIT(RCS0), /* XXX reduced for debugging */ }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 754a78364a633..3ecf92aa5fc12 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to, struct i915_request *from, gfp_t gfp) { + const int has_token = INTEL_GEN(to->i915) >= 12; u32 hwsp_offset; + int len; u32 *cs; int err; @@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to, if (err) return err; - cs = intel_ring_begin(to, 4); + len = 4; + if (has_token) + len += 2; + + cs = intel_ring_begin(to, len); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to, * (post-wrap) values than they were expecting (and so wait * forever). */ - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD) + + has_token; *cs++ = from->fence.seqno; *cs++ = hwsp_offset; *cs++ = 0; + if (has_token) { + *cs++ = 0; + *cs++ = MI_NOOP; + } intel_ring_advance(to, cs); to->sched.semaphores |= from->engine->mask; From 8698ba53cd7173c32320ebbef4d389d41ebb5780 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 16 Sep 2019 16:32:51 -0700 Subject: [PATCH 181/331] drm/i915/cml: Add second PCH ID for CMP The CMP PCH ID we have in the driver is correct for the CML-U machines we have in our CI system, but the CML-S and CML-H CI machines appear to use a different PCH ID, leading our driver to detect no PCH for them. Cc: Rodrigo Vivi Cc: Anusha Srivatsa References: 729ae330a0f2e2 ("drm/i915/cml: Introduce Comet Lake PCH") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111461 Signed-off-by: Matt Roper Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190916233251.387-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_pch.c | 1 + drivers/gpu/drm/i915/intel_pch.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index fa864d8f2b731..15f8bff141f91 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CMP_DEVICE_ID_TYPE: + case INTEL_PCH_CMP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n"); WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index e6a2d65f19c62..c29c81ec7971c 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -41,6 +41,7 @@ enum intel_pch { #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 +#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880 From 56c05de6bd773b96deca379370965c49042b5fbf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Sep 2019 20:47:46 +0100 Subject: [PATCH 182/331] drm/i915: Extend Haswell GT1 PSMI workaround to all A few times in CI, we have detected a GPU hang on our Haswell GT2 systems with the characteristic IPEHR of 0x780c0000. When the PSMI w/a was first introducted, it was applied to all Haswell, but later on we found an erratum that supposedly restricted the issue to GT1 and so constrained it only be applied on GT1. That may have been a mistake... Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111692 Fixes: 167bc759e823 ("drm/i915: Restrict PSMI context load w/a to Haswell GT1") References: 2c550183476d ("drm/i915: Disable PSMI sleep messages on all rings around context switches") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190917194746.26710-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index a73296e6b13d5..a25b84b12ef1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1574,7 +1574,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *engine = rq->engine; enum intel_engine_id id; const int num_engines = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; + IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; From 0d333ac7eb17f0f6c86db66afba70135f11da4a9 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 18 Sep 2019 18:53:30 -0700 Subject: [PATCH 183/331] drm/i915: fix SFC reset flow Our assumption that the we can ask the HW to lock the SFC even if not currently in use does not match the HW commitment. The expectation from the HW is that SW will not try to lock the SFC if the engine is not using it and if we do that the behavior is undefined; on ICL the HW ends up to returning the ack and ignoring our lock request, but this is not guaranteed and we shouldn't expect it going forward. Also, failing to get the ack while the SFC is in use means that we can't cleanly reset it, so fail the engine reset in that scenario. v2: drop rmw change, keep the log as debug and handle failure (Chris), improve comments (Tvrtko). Reported-by: Owen Zhang Signed-off-by: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Tvrtko Ursulin Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190919015330.15435-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/intel_reset.c | 50 ++++++++++++++++++--------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8327220ac5585..89048ca8924cd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -309,7 +309,7 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } -static u32 gen11_lock_sfc(struct intel_engine_cs *engine) +static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) { struct intel_uncore *uncore = engine->uncore; u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; @@ -318,6 +318,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) i915_reg_t sfc_usage; u32 sfc_usage_bit; u32 sfc_reset_bit; + int ret; switch (engine->class) { case VIDEO_DECODE_CLASS: @@ -352,27 +353,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) } /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). + * If the engine is using a SFC, tell the engine that a software reset + * is going to happen. The engine will then try to force lock the SFC. + * If SFC ends up being locked to the engine we want to reset, we have + * to reset it as well (we will unlock it once the reset sequence is + * completed). */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) + return 0; + rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); - if (__intel_wait_for_register_fw(uncore, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + ret = __intel_wait_for_register_fw(uncore, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL); + + /* Was the SFC released while we were trying to lock it? */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) return 0; - } - if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; + if (ret) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return ret; + } + *hw_mask |= sfc_reset_bit; return 0; } @@ -430,12 +437,21 @@ static int gen11_reset_engines(struct intel_gt *gt, for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(engine); + ret = gen11_lock_sfc(engine, &hw_mask); + if (ret) + goto sfc_unlock; } } ret = gen6_hw_domain_reset(gt, hw_mask); +sfc_unlock: + /* + * We unlock the SFC based on the lock status and not the result of + * gen11_lock_sfc to make sure that we clean properly if something + * wrong happened during the lock (e.g. lock acquired after timeout + * expiration). + */ if (engine_mask != ALL_ENGINES) for_each_engine_masked(engine, gt->i915, engine_mask, tmp) gen11_unlock_sfc(engine); From 37fa0de3c137d5f54f7e64f53495c9d501d42a4d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Sep 2019 15:54:50 +0100 Subject: [PATCH 184/331] drm/i915: Verify the engine after acquiring the active.lock When using virtual engines, the rq->engine is not stable until we hold the engine->active.lock (as the virtual engine may be exchanged with the sibling). Since commit 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") we may retire a request concurrently with resubmitting it to HW, we need to be extra careful to verify we are holding the correct lock for the request's active list. This is similar to the issue we saw with rescheduling the virtual requests, see sched_lock_engine(). Or else: <4> [876.736126] list_add corruption. prev->next should be next (ffff8883f931a1f8), but was dead000000000100. (prev=ffff888361ffa610). <4> [876.736136] WARNING: CPU: 2 PID: 21 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70 <4> [876.736137] Modules linked in: i915(+) amdgpu gpu_sched ttm vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic mei_hdcp x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_intel_nhlt snd_hda_codec snd_hwdep snd_hda_core ghash_clmulni_intel e1000e cdc_ether usbnet mii snd_pcm ptp pps_core mei_me mei prime_numbers btusb btrtl btbcm btintel bluetooth ecdh_generic ecc [last unloaded: i915] <4> [876.736154] CPU: 2 PID: 21 Comm: ksoftirqd/2 Tainted: G U 5.3.0-CI-CI_DRM_6898+ #1 <4> [876.736156] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3183.A00.1905020411 05/02/2019 <4> [876.736157] RIP: 0010:__list_add_valid+0x4d/0x70 <4> [876.736159] Code: c3 48 89 d1 48 c7 c7 20 33 0e 82 48 89 c2 e8 4a 4a bc ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 70 33 0e 82 e8 33 4a bc ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 c0 33 0e 82 e8 <4> [876.736160] RSP: 0018:ffffc9000018bd30 EFLAGS: 00010082 <4> [876.736162] RAX: 0000000000000000 RBX: ffff888361ffc840 RCX: 0000000000000104 <4> [876.736163] RDX: 0000000080000104 RSI: 0000000000000000 RDI: 00000000ffffffff <4> [876.736164] RBP: ffffc9000018bd68 R08: 0000000000000000 R09: 0000000000000001 <4> [876.736165] R10: 00000000aed95de3 R11: 000000007fe927eb R12: ffff888361ffca10 <4> [876.736166] R13: ffff888361ffa610 R14: ffff888361ffc880 R15: ffff8883f931a1f8 <4> [876.736168] FS: 0000000000000000(0000) GS:ffff88849fd00000(0000) knlGS:0000000000000000 <4> [876.736169] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [876.736170] CR2: 00007f093a9173c0 CR3: 00000003bba08005 CR4: 0000000000760ee0 <4> [876.736171] PKRU: 55555554 <4> [876.736172] Call Trace: <4> [876.736226] __i915_request_submit+0x152/0x370 [i915] <4> [876.736263] __execlists_submission_tasklet+0x6da/0x1f50 [i915] <4> [876.736293] ? execlists_submission_tasklet+0x29/0x50 [i915] <4> [876.736321] execlists_submission_tasklet+0x34/0x50 [i915] <4> [876.736325] tasklet_action_common.isra.5+0x47/0xb0 <4> [876.736328] __do_softirq+0xd8/0x4ae <4> [876.736332] ? smpboot_thread_fn+0x23/0x280 <4> [876.736334] ? smpboot_thread_fn+0x6b/0x280 <4> [876.736336] run_ksoftirqd+0x2b/0x50 <4> [876.736338] smpboot_thread_fn+0x1d3/0x280 <4> [876.736341] ? sort_range+0x20/0x20 <4> [876.736343] kthread+0x119/0x130 <4> [876.736345] ? kthread_park+0xa0/0xa0 <4> [876.736347] ret_from_fork+0x24/0x50 <4> [876.736353] irq event stamp: 2290145 <4> [876.736356] hardirqs last enabled at (2290144): [] __slab_free+0x3e8/0x500 <4> [876.736358] hardirqs last disabled at (2290145): [] _raw_spin_lock_irqsave+0xd/0x50 <4> [876.736360] softirqs last enabled at (2290114): [] __do_softirq+0x33e/0x4ae <4> [876.736361] softirqs last disabled at (2290119): [] run_ksoftirqd+0x2b/0x50 <4> [876.736363] WARNING: CPU: 2 PID: 21 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70 <4> [876.736364] ---[ end trace 3e58d6c7356c65bf ]--- <4> [876.736406] ------------[ cut here ]------------ <4> [876.736415] list_del corruption. prev->next should be ffff888361ffca10, but was ffff88840ac2c730 <4> [876.736421] WARNING: CPU: 2 PID: 5490 at lib/list_debug.c:53 __list_del_entry_valid+0x79/0x90 <4> [876.736422] Modules linked in: i915(+) amdgpu gpu_sched ttm vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic mei_hdcp x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_intel_nhlt snd_hda_codec snd_hwdep snd_hda_core ghash_clmulni_intel e1000e cdc_ether usbnet mii snd_pcm ptp pps_core mei_me mei prime_numbers btusb btrtl btbcm btintel bluetooth ecdh_generic ecc [last unloaded: i915] <4> [876.736433] CPU: 2 PID: 5490 Comm: i915_selftest Tainted: G U W 5.3.0-CI-CI_DRM_6898+ #1 <4> [876.736435] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3183.A00.1905020411 05/02/2019 <4> [876.736436] RIP: 0010:__list_del_entry_valid+0x79/0x90 <4> [876.736438] Code: 0b 31 c0 c3 48 89 fe 48 c7 c7 30 34 0e 82 e8 ae 49 bc ff 0f 0b 31 c0 c3 48 89 f2 48 89 fe 48 c7 c7 68 34 0e 82 e8 97 49 bc ff <0f> 0b 31 c0 c3 48 c7 c7 a8 34 0e 82 e8 86 49 bc ff 0f 0b 31 c0 c3 <4> [876.736439] RSP: 0018:ffffc900003ef758 EFLAGS: 00010086 <4> [876.736440] RAX: 0000000000000000 RBX: ffff888361ffc840 RCX: 0000000000000002 <4> [876.736442] RDX: 0000000080000002 RSI: 0000000000000000 RDI: 00000000ffffffff <4> [876.736443] RBP: ffffc900003ef780 R08: 0000000000000000 R09: 0000000000000001 <4> [876.736444] R10: 000000001418e4b7 R11: 000000007f0ea93b R12: ffff888361ffcab8 <4> [876.736445] R13: ffff88843b6d0000 R14: 000000000000217c R15: 0000000000000001 <4> [876.736447] FS: 00007f4e6f255240(0000) GS:ffff88849fd00000(0000) knlGS:0000000000000000 <4> [876.736448] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [876.736449] CR2: 00007f093a9173c0 CR3: 00000003bba08005 CR4: 0000000000760ee0 <4> [876.736450] PKRU: 55555554 <4> [876.736451] Call Trace: <4> [876.736488] i915_request_retire+0x224/0x8e0 [i915] <4> [876.736521] i915_request_create+0x4b/0x1b0 [i915] <4> [876.736550] nop_virtual_engine+0x230/0x4d0 [i915] Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111695 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190918145453.8800-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3ecf92aa5fc12..606acde47fa04 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -195,6 +195,27 @@ static void free_capture_list(struct i915_request *request) } } +static void remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + list_del(&rq->sched.link); + spin_unlock(&locked->active.lock); +} + static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; @@ -260,9 +281,7 @@ static bool i915_request_retire(struct i915_request *rq) * request that we have removed from the HW and put back on a run * queue. */ - spin_lock(&rq->engine->active.lock); - list_del(&rq->sched.link); - spin_unlock(&rq->engine->active.lock); + remove_from_engine(rq); spin_lock(&rq->lock); i915_request_mark_complete(rq); From a47e788c2310a59b8e42e0bfc18d810118dff7bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Sep 2019 14:14:14 +0100 Subject: [PATCH 185/331] drm/i915/selftests: Exercise CS TLB invalidation Check that we are correctly invalidating the TLB at the start of a batch after updating the GTT. v2: Comments and hold the request reference while spinning Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniele Ceraolo Spurio Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190919131414.7495-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 308 ++++++++++++++++++ 1 file changed, 308 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 598c18d10640a..0d40e0b429230 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -25,13 +25,16 @@ #include #include +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_context.h" #include "i915_random.h" #include "i915_selftest.h" #include "mock_drm.h" #include "mock_gem_device.h" +#include "igt_flush_test.h" static void cleanup_freed_objects(struct drm_i915_private *i915) { @@ -1705,6 +1708,310 @@ int i915_gem_gtt_mock_selftests(void) return err; } +static int context_sync(struct intel_context *ce) +{ + struct i915_request *rq; + long timeout; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + timeout = i915_request_wait(rq, 0, HZ / 5); + i915_request_put(rq); + + return timeout < 0 ? -EIO : 0; +} + +static struct i915_request * +submit_batch(struct intel_context *ce, u64 addr) +{ + struct i915_request *rq; + int err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + err = 0; + if (rq->engine->emit_init_breadcrumb) /* detect a hang */ + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = rq->engine->emit_bb_start(rq, addr, 0, 0); + + if (err == 0) + i915_request_get(rq); + i915_request_add(rq); + + return err ? ERR_PTR(err) : rq; +} + +static u32 *spinner(u32 *batch, int i) +{ + return batch + i * 64 / sizeof(*batch) + 4; +} + +static void end_spin(u32 *batch, int i) +{ + *spinner(batch, i) = MI_BATCH_BUFFER_END; + wmb(); +} + +static int igt_cs_tlb(void *arg) +{ + const unsigned int count = PAGE_SIZE / 64; + const unsigned int chunk_size = count * PAGE_SIZE; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *bbe, *act, *out; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + struct i915_vma *vma; + unsigned int i; + u32 *result; + u32 *batch; + int err = 0; + + /* + * Our mission here is to fool the hardware to execute something + * from scratch as it has not seen the batch move (due to missing + * the TLB invalidate). + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + vm = ctx->vm; + if (!vm) + goto out_unlock; + + /* Create two pages; dummy we prefill the TLB, and intended */ + bbe = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(bbe)) { + err = PTR_ERR(bbe); + goto out_unlock; + } + + batch = i915_gem_object_pin_map(bbe, I915_MAP_WC); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put_bbe; + } + memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32)); + i915_gem_object_flush_map(bbe); + i915_gem_object_unpin_map(bbe); + + act = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(act)) { + err = PTR_ERR(act); + goto out_put_bbe; + } + + /* Track the execution of each request by writing into different slot */ + batch = i915_gem_object_pin_map(act, I915_MAP_WC); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put_act; + } + for (i = 0; i < count; i++) { + u32 *cs = batch + i * 64 / sizeof(*cs); + u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32); + + GEM_BUG_ON(INTEL_GEN(i915) < 6); + cs[0] = MI_STORE_DWORD_IMM_GEN4; + if (INTEL_GEN(i915) >= 8) { + cs[1] = lower_32_bits(addr); + cs[2] = upper_32_bits(addr); + cs[3] = i; + cs[4] = MI_NOOP; + cs[5] = MI_BATCH_BUFFER_START_GEN8; + } else { + cs[1] = 0; + cs[2] = lower_32_bits(addr); + cs[3] = i; + cs[4] = MI_NOOP; + cs[5] = MI_BATCH_BUFFER_START; + } + } + + out = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(out)) { + err = PTR_ERR(out); + goto out_put_batch; + } + i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED); + + vma = i915_vma_instance(out, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put_batch; + } + + err = i915_vma_pin(vma, 0, 0, + PIN_USER | + PIN_OFFSET_FIXED | + (vm->total - PAGE_SIZE)); + if (err) + goto out_put_out; + GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE); + + result = i915_gem_object_pin_map(out, I915_MAP_WB); + if (IS_ERR(result)) { + err = PTR_ERR(result); + goto out_put_out; + } + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + IGT_TIMEOUT(end_time); + unsigned long pass = 0; + + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + while (!__igt_timeout(end_time, NULL)) { + struct i915_request *rq; + u64 offset; + + offset = random_offset(0, vm->total - PAGE_SIZE, + chunk_size, PAGE_SIZE); + + err = vm->allocate_va_range(vm, offset, chunk_size); + if (err) + goto end; + + memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + vma = i915_vma_instance(bbe, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto end; + } + + err = vma->ops->set_pages(vma); + if (err) + goto end; + + /* Prime the TLB with the dummy pages */ + for (i = 0; i < count; i++) { + vma->node.start = offset + i * PAGE_SIZE; + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0); + + rq = submit_batch(ce, vma->node.start); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + i915_request_put(rq); + } + + vma->ops->clear_pages(vma); + + err = context_sync(ce); + if (err) { + pr_err("%s: dummy setup timed out\n", + ce->engine->name); + goto end; + } + + vma = i915_vma_instance(act, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto end; + } + + err = vma->ops->set_pages(vma); + if (err) + goto end; + + /* Replace the TLB with target batches */ + for (i = 0; i < count; i++) { + struct i915_request *rq; + u32 *cs = batch + i * 64 / sizeof(*cs); + u64 addr; + + vma->node.start = offset + i * PAGE_SIZE; + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0); + + addr = vma->node.start + i * 64; + cs[4] = MI_NOOP; + cs[6] = lower_32_bits(addr); + cs[7] = upper_32_bits(addr); + wmb(); + + rq = submit_batch(ce, addr); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + + /* Wait until the context chain has started */ + if (i == 0) { + while (READ_ONCE(result[i]) && + !i915_request_completed(rq)) + cond_resched(); + } else { + end_spin(batch, i - 1); + } + + i915_request_put(rq); + } + end_spin(batch, count - 1); + + vma->ops->clear_pages(vma); + + err = context_sync(ce); + if (err) { + pr_err("%s: writes timed out\n", + ce->engine->name); + goto end; + } + + for (i = 0; i < count; i++) { + if (result[i] != i) { + pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n", + ce->engine->name, pass, + offset, i, result[i], i); + err = -EINVAL; + goto end; + } + } + + vm->clear_range(vm, offset, chunk_size); + pass++; + } + } +end: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + i915_gem_context_unlock_engines(ctx); + i915_gem_object_unpin_map(out); +out_put_out: + i915_gem_object_put(out); +out_put_batch: + i915_gem_object_unpin_map(act); +out_put_act: + i915_gem_object_put(act); +out_put_bbe: + i915_gem_object_put(bbe); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + mock_file_free(i915, file); + return err; +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -1722,6 +2029,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), + SUBTEST(igt_cs_tlb), }; GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total)); From b01a3ef34816461c31711e5559663e453cb83aba Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 18 Sep 2019 16:56:25 -0700 Subject: [PATCH 186/331] drm/i915: Future-proof DDC pin mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We generally assume future platforms will inherit the behavior of the most recent platforms, so update our DDC pin mapping defaults to match how ICP/TGP behave (i.e., pins starting from GMBUS_PIN_1_BXT for combo PHY's and pins starting from GMBUS_PIN_9_TC1_ICP for TC PHY's). MCC's non-standard handling of combo PHY C seems like a platform-specific quirk that is unlikely to be duplicated on future platforms, so continue handling it as a special case. Without this change, future platforms would default to gen4-style pin mapping which is almost certainly not what we'll want. Cc: José Roberto de Souza Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190918235626.3750-1-matthew.d.roper@intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 640254feef27d..f81541cff5dfd 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3028,7 +3028,7 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); - else if (HAS_PCH_TGP(dev_priv) || HAS_PCH_ICP(dev_priv)) + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) ddc_pin = icl_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_port_to_ddc_pin(dev_priv, port); From d09ad3e7af3ab230323c2c00e190f801feb3a343 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 18 Sep 2019 16:56:26 -0700 Subject: [PATCH 187/331] drm/i915: Unify ICP and MCC hotplug pin tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCC hpd table is just a subset of the ICP table; we can eliminate it and use the ICP table everywhere. The extra pins in the table won't be a problem for MCC since we still supply an appropriate hotplug trigger mask anywhere the pin table is used. Cc: José Roberto de Souza Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190918235626.3750-2-matthew.d.roper@intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/i915_irq.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ae7228032d2c3..bc83f094065ae 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -157,12 +157,6 @@ static const u32 hpd_icp[HPD_NUM_PINS] = { [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4), }; -static const u32 hpd_mcc[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), - [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), - [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), -}; - static const u32 hpd_tgp[HPD_NUM_PINS] = { [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), @@ -2258,7 +2252,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; - pins = hpd_mcc; + pins = hpd_icp; } else { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; @@ -3434,7 +3428,7 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, 0, TGP_DDI_HPD_ENABLE_MASK, 0, - hpd_mcc); + hpd_icp); } static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) From 1c757497096f55d2e037462bfcdfed50877b4cf5 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Mon, 9 Sep 2019 16:14:45 -0700 Subject: [PATCH 188/331] drm/i915/tgl: Implement Wa_1409142259 Disable CPS aware color pipe by setting chicken bit. BSpec: 52890 HSDES: 1409142259 v2: Move WA to ctx WA's(Daniele) Cc: Daniele Ceraolo Spurio Cc: Stuart Summers Cc: Matt Roper Signed-off-by: Radhakrishna Sripada Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20190909231445.23815-1-radhakrishna.sripada@intel.com Reviewed-by: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 41d0f786e06d7..ba65e50189780 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -567,6 +567,9 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + /* Wa_1409142259 */ + WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } static void diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bf37ecebc82f8..f8f52ae6cc6fe 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7672,6 +7672,7 @@ enum { #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC (1 << 11) + #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE (1 << 9) #define HIZ_CHICKEN _MMIO(0x7018) # define CHV_HZ_8X8_MODE_IN_1X (1 << 15) From 7f0cc34b5349cfb2867358737b3338cb2fcbc354 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 25 Jul 2019 17:02:26 -0700 Subject: [PATCH 189/331] drm/i915/tgl: Implement Wa_1406941453 Enable Small PL for power benefit. Signed-off-by: Michel Thierry Signed-off-by: Lucas De Marchi Reviewed-by: Stuart Summers Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20190713010940.17711-18-lucas.demarchi@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20190726000226.26914-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 +++++++ drivers/gpu/drm/i915/i915_reg.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ba65e50189780..25ae608463987 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1260,6 +1260,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + if (IS_GEN(i915, 12)) { + /* Wa_1406941453:tgl */ + wa_masked_en(wal, + SAMPLER_MODE, + SAMPLER_ENABLE_SMALL_PL); + } + if (IS_GEN(i915, 11)) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f8f52ae6cc6fe..5e3a6178aff44 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8965,6 +8965,9 @@ enum { #define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5) #define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3) +#define SAMPLER_MODE _MMIO(0xe18c) +#define SAMPLER_ENABLE_SMALL_PL (1 << 15) + #define GEN8_ROW_CHICKEN _MMIO(0xe4f0) #define FLOW_CONTROL_ENABLE (1 << 15) #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1 << 8) From bed34ef544f9ab37ab349c04cf4142282c4dcf5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 5 Sep 2019 16:50:43 +0300 Subject: [PATCH 190/331] drm/i915: Bump skl+ max plane width to 5k for linear/x-tiled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The officially validated plane width limit is 4k on skl+, however we already had people using 5k displays before we started to enforce the limit. Also it seems Windows allows 5k resolutions as well (though not sure if they do it with one plane or two). According to hw folks 5k should work with the possible exception of the following features: - Ytile (already limited to 4k) - FP16 (already limited to 4k) - render compression (already limited to 4k) - KVMR sprite and cursor (don't care) - horizontal panning (need to verify this) - pipe and plane scaling (need to verify this) So apart from last two items on that list we are already fine. We should really verify what happens with those last two items but I don't have a 5k display on hand atm so it'll have to wait. In the meantime let's just bump the limit back up to 5k since several users have already been using it without apparent issues. At least we'll be no worse off than we were prior to lowering the limits. Cc: stable@vger.kernel.org Cc: Sean Paul Cc: José Roberto de Souza Tested-by: Leho Kraav Fixes: 372b9ffb5799 ("drm/i915: Fix skl+ max plane width") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111501 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190905135044.2001-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Reviewed-by: Sean Paul --- drivers/gpu/drm/i915/display/intel_display.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1cc74844d3ea6..a61f4dfbf540b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3282,7 +3282,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - return 4096; + /* + * Validated limit is 4k, but has 5k should + * work apart from the following features: + * - Ytile (already limited to 4k) + * - FP16 (already limited to 4k) + * - render compression (already limited to 4k) + * - KVMR sprite and cursor (don't care) + * - horizontal panning (TODO verify this) + * - pipe and plane scaling (TODO verify this) + */ + if (cpp == 8) + return 4096; + else + return 5120; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: /* FIXME AUX plane? */ From 2d20411e25a3bf3d2914a2219f47ed48dc57aed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 18 Sep 2019 18:07:07 +0300 Subject: [PATCH 191/331] drm/i915: Don't advertise modes that exceed the max plane size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modern platforms allow the transcoders hdisplay/vdisplay to exceed the planes' max resolution. This has the nasty implication that modes on the connectors' mode list may not be usable when the user asks for a fullscreen plane. Seeing as that is the most common use case it seems prudent to filter out modes that don't allow for fullscreen planes to be enabled. Let's do that in the connetor .mode_valid() hook so that normally such modes are kept hidden but the user is still able to forcibly specify such a mode if they know they don't need fullscreen planes. This is in line with ealier policies regarding certain clock limits. The idea is to prevent the casual user from encountering a mode that would fail under typical conditions, but allow the expert user to force things if they so wish. Maybe in the future we should consider automagically using two planes when one can't cover the entire screen? Wouldn't be a great match for the current uapi with explicit planes though, but I guess no worse than using two pipes (which we apparently have to in the future anyway). Either that or we'd have to teach userspace to do it for us. v2: Fix icl+ max plane heigth (Manasi) Cc: Manasi Navare Cc: Leho Kraav Cc: Sean Paul Cc: José Roberto de Souza Reviewed-by: Maarten Lankhorst Reviewed-by: Manasi Navare Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190918150707.32420-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 36 ++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 4 +++ drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_dsi.c | 3 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 4 ++- 5 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a61f4dfbf540b..e0033d99f6e34 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15753,6 +15753,7 @@ intel_mode_valid(struct drm_device *dev, DRM_MODE_FLAG_CLKDIV2)) return MODE_BAD; + /* Transcoder timing limits */ if (INTEL_GEN(dev_priv) >= 11) { hdisplay_max = 16384; vdisplay_max = 8192; @@ -15791,6 +15792,41 @@ intel_mode_valid(struct drm_device *dev, return MODE_OK; } +enum drm_mode_status +intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + int plane_width_max, plane_height_max; + + /* + * intel_mode_valid() should be + * sufficient on older platforms. + */ + if (INTEL_GEN(dev_priv) < 9) + return MODE_OK; + + /* + * Most people will probably want a fullscreen + * plane so let's not advertize modes that are + * too big for that. + */ + if (INTEL_GEN(dev_priv) >= 11) { + plane_width_max = 5120; + plane_height_max = 4320; + } else { + plane_width_max = 5120; + plane_height_max = 4096; + } + + if (mode->hdisplay > plane_width_max) + return MODE_H_ILLEGAL; + + if (mode->vdisplay > plane_height_max) + return MODE_V_ILLEGAL; + + return MODE_OK; +} + static const struct drm_mode_config_funcs intel_mode_funcs = { .fb_create = intel_user_framebuffer_create, .get_format_info = intel_get_format_info, diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 66330fcb10d48..b1ae0e59c7153 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -32,6 +32,7 @@ enum link_m_n_set; struct dpll; struct drm_connector; struct drm_device; +struct drm_display_mode; struct drm_encoder; struct drm_file; struct drm_format_info; @@ -448,6 +449,9 @@ void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, u32 pixel_format, u64 modifier); bool intel_plane_can_remap(const struct intel_plane_state *plane_state); +enum drm_mode_status +intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); void intel_plane_destroy(struct drm_plane *plane); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f06fbca5cb3ea..0c7755e915d91 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -562,7 +562,7 @@ intel_dp_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } u32 intel_dp_pack_aux(const u8 *src, int src_bytes) diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c index 5fec02aceaed9..a2a937109a5ad 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.c +++ b/drivers/gpu/drm/i915/display/intel_dsi.c @@ -55,6 +55,7 @@ int intel_dsi_get_modes(struct drm_connector *connector) enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -73,7 +74,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; } - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi, diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f81541cff5dfd..04a8718bd2d71 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2188,8 +2188,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector, status = hdmi_port_clock_valid(hdmi, clock * 5 / 4, true, force_dvi); } + if (status != MODE_OK) + return status; - return status; + return intel_mode_valid_max_plane_size(dev_priv, mode); } static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, From c45e788d95b470e9f68fabe1f3cb44beb5dd7840 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Sep 2019 16:18:11 +0100 Subject: [PATCH 192/331] drm/i915/tgl: Suspend pre-parser across GTT invalidations Before we execute a batch, we must first issue any and all TLB invalidations so that batch picks up the new page table entries. Tigerlake's preparser is weakening our post-sync CS_STALL inside the invalidate pipe-control and allowing the loading of the batch buffer before we have setup its page table (and so it loads the wrong page and executes indefinitely). The igt_cs_tlb indicates that this issue can only be observed on rcs, even though the preparser is common to all engines. Alternatively, we could do TLB shootdown via mmio on updating the GTT. By inserting the pre-parser disable inside EMIT_INVALIDATE, we will also accidentally fixup execution that writes into subsequent batches, such as gem_exec_whisper and even relocations performed on the GPU. We should be careful not to allow this disable to become baked into the uABI! The issue is that if userspace relies on our disabling of the HW optimisation, when we are ready to enable that optimisation, userspace will then be broken... Testcase: igt/i915_selftests/live_gtt/igt_cs_tlb Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111753 Signed-off-by: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190919151811.9526-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 74 ++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a99166a2d2eb9..49ed9230a2cb1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2807,6 +2807,78 @@ static int gen11_emit_flush_render(struct i915_request *request, return 0; } +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | 1 << 8 | state; +} + +static int gen12_emit_flush_render(struct i915_request *request, + u32 mode) +{ + const u32 scratch_addr = + intel_gt_scratch_offset(request->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + + if (mode & EMIT_FLUSH) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 8); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Prevent the pre-parser from skipping past the TLB + * invalidate and loading a stale page for the batch + * buffer / request payload. + */ + *cs++ = preparser_disable(true); + + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + + *cs++ = preparser_disable(false); + intel_ring_advance(request, cs); + } + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -3072,7 +3144,7 @@ static void rcs_submission_override(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { case 12: - engine->emit_flush = gen11_emit_flush_render; + engine->emit_flush = gen12_emit_flush_render; engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; break; case 11: From d19d71fc2b15bf30ff3e56932eae23ff096c1396 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Sep 2019 12:19:10 +0100 Subject: [PATCH 193/331] drm/i915: Mark i915_request.timeline as a volatile, rcu pointer The request->timeline is only valid until the request is retired (i.e. before it is completed). Upon retiring the request, the context may be unpinned and freed, and along with it the timeline may be freed. We therefore need to be very careful when chasing rq->timeline that the pointer does not disappear beneath us. The vast majority of users are in a protected context, either during request construction or retirement, where the timeline->mutex is held and the timeline cannot disappear. It is those few off the beaten path (where we access a second timeline) that need extra scrutiny -- to be added in the next patch after first adding the warnings about dangerous access. One complication, where we cannot use the timeline->mutex itself, is during request submission onto hardware (under spinlocks). Here, we want to check on the timeline to finalize the breadcrumb, and so we need to impose a second rule to ensure that the request->timeline is indeed valid. As we are submitting the request, it's context and timeline must be pinned, as it will be used by the hardware. Since it is pinned, we know the request->timeline must still be valid, and we cannot submit the idle barrier until after we release the engine->active.lock, ergo while submitting and holding that spinlock, a second thread cannot release the timeline. v2: Don't be lazy inside selftests; hold the timeline->mutex for as long as we need it, and tidy up acquiring the timeline with a bit of refactoring (i915_active_add_request) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190919111912.21631-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_overlay.c | 2 +- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 57 ++++++++++++++++--- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_pool.h | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 17 +++--- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 27 +++++---- drivers/gpu/drm/i915/gt/intel_timeline.c | 6 +- .../gpu/drm/i915/gt/intel_timeline_types.h | 1 + drivers/gpu/drm/i915/gt/selftest_context.c | 18 ++++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- drivers/gpu/drm/i915/i915_active.c | 2 +- drivers/gpu/drm/i915/i915_active.h | 6 ++ drivers/gpu/drm/i915/i915_request.c | 28 +++++---- drivers/gpu/drm/i915/i915_request.h | 22 ++++++- drivers/gpu/drm/i915/i915_vma.c | 6 +- drivers/gpu/drm/i915/selftests/i915_active.c | 2 +- drivers/gpu/drm/i915/selftests/igt_spinner.c | 2 +- 20 files changed, 147 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 29edfc3437163..5efef9babadb1 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *)) if (IS_ERR(rq)) return rq; - err = i915_active_ref(&overlay->last_flip, rq->timeline, rq); + err = i915_active_add_request(&overlay->last_flip, rq); if (err) { i915_request_add(rq); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index f999206527513..7f61a80241334 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work) * keep track of the GPU activity within this vma/request, and * propagate the signal from the request to w->dma. */ - err = i915_active_ref(&vma->active, rq->timeline, rq); + err = i915_active_add_request(&vma->active, rq); if (err) goto out_request; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f1c0e5d958f3b..4a34c4f62065d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -910,7 +910,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (emit) err = emit(rq, data); if (err == 0) - err = i915_active_ref(&cb->base, rq->timeline, rq); + err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index c0495811f4932..26cb838c272c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -298,7 +298,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, /* Only suitable for use in remotely modifying this context */ GEM_BUG_ON(rq->hw_context == ce); - if (rq->timeline != tl) { /* beware timeline sharing */ + if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ err = mutex_lock_interruptible_nested(&tl->mutex, SINGLE_DEPTH_NESTING); if (err) @@ -319,7 +319,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, * words transfer the pinned ce object to tracked active request. */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); - return i915_active_ref(&ce->active, rq->timeline, rq); + return i915_active_add_request(&ce->active, rq); } struct i915_request *intel_context_create_request(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3c176b0f4b453..f451d5076bdef 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -680,6 +680,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) engine->status_page.vma)) goto out_frame; + mutex_lock(&frame->timeline.mutex); + frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; @@ -688,18 +690,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.ring = &frame->ring; - frame->rq.timeline = &frame->timeline; + rcu_assign_pointer(frame->rq.timeline, &frame->timeline); dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; + spin_lock_irq(&engine->active.lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ intel_timeline_unpin(&frame->timeline); out_timeline: + mutex_unlock(&frame->timeline.mutex); intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); @@ -1196,6 +1202,27 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } +static struct intel_timeline *get_timeline(struct i915_request *rq) +{ + struct intel_timeline *tl; + + /* + * Even though we are holding the engine->active.lock here, there + * is no control over the submission queue per-se and we are + * inspecting the active state at a random point in time, with an + * unknown queue. Play safe and make sure the timeline remains valid. + * (Only being used for pretty printing, one extra kref shouldn't + * cause a camel stampede!) + */ + rcu_read_lock(); + tl = rcu_dereference(rq->timeline); + if (!kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + + return tl; +} + static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { @@ -1290,27 +1317,37 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, int len; len = snprintf(hdr, sizeof(hdr), - "\t\tActive[%d: ", + "\t\tActive[%d]: ", (int)(port - execlists->active)); - if (!i915_request_signaled(rq)) + if (!i915_request_signaled(rq)) { + struct intel_timeline *tl = get_timeline(rq); + len += snprintf(hdr + len, sizeof(hdr) - len, "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, + tl ? tl->hwsp_offset : 0, hwsp_seqno(rq)); + + if (tl) + intel_timeline_put(tl); + } snprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); } for (port = execlists->pending; (rq = *port); port++) { + struct intel_timeline *tl = get_timeline(rq); char hdr[80]; snprintf(hdr, sizeof(hdr), "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", (int)(port - execlists->pending), i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, + tl ? tl->hwsp_offset : 0, hwsp_seqno(rq)); print_request(m, rq, hdr); + + if (tl) + intel_timeline_put(tl); } spin_unlock_irqrestore(&engine->active.lock, flags); } else if (INTEL_GEN(dev_priv) > 6) { @@ -1388,6 +1425,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { + struct intel_timeline *tl = get_timeline(rq); + print_request(m, rq, "\t\tactive "); drm_printf(m, "\t\tring->start: 0x%08x\n", @@ -1400,8 +1439,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - rq->timeline->hwsp_offset); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } print_request_ring(m, rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 65b5ca74b3947..ce61c83d68e97 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -103,7 +103,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(rq->timeline); + intel_timeline_enter(i915_request_timeline(rq)); /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h index 7e2123b335949..1bd89cadc3b79 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h @@ -18,7 +18,7 @@ static inline int intel_engine_pool_mark_active(struct intel_engine_pool_node *node, struct i915_request *rq) { - return i915_active_ref(&node->active, rq->timeline, rq); + return i915_active_add_request(&node->active, rq); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 49ed9230a2cb1..1a2b71157f08d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1825,7 +1825,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1841,7 +1841,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) *cs++ = MI_NOOP; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_timeline(rq)->hwsp_offset; *cs++ = 0; *cs++ = rq->fence.seqno - 1; @@ -2324,7 +2324,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->timeline->requests; + const struct list_head * const list = + &i915_request_active_timeline(rq)->requests; const struct intel_context * const ce = rq->hw_context; struct i915_request *active = NULL; @@ -2927,7 +2928,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, 0); return gen8_emit_fini_breadcrumb_footer(request, cs); @@ -2944,7 +2945,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); @@ -2956,7 +2957,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -3020,7 +3021,7 @@ static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, 0); return gen12_emit_fini_breadcrumb_footer(request, cs); @@ -3031,7 +3032,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index a25b84b12ef1d..0747b8c9f7683 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -322,7 +322,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -425,7 +426,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -439,8 +440,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -459,8 +460,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -938,8 +939,8 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -961,8 +962,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -1815,7 +1816,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - GEM_BUG_ON(request->timeline->has_initial_breadcrumb); + GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -1926,7 +1927,9 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) */ GEM_BUG_ON(!rq->reserved_space); - ret = wait_for_space(ring, rq->timeline, total_bytes); + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 9cb01d9828f1d..115a24d4a20a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -493,7 +493,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, static int cacheline_ref(struct intel_timeline_cacheline *cl, struct i915_request *rq) { - return i915_active_ref(&cl->active, rq->timeline, rq); + return i915_active_add_request(&cl->active, rq); } int intel_timeline_read_hwsp(struct i915_request *from, @@ -504,7 +504,7 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct intel_timeline *tl = from->timeline; int err; - GEM_BUG_ON(to->timeline == tl); + GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); err = i915_request_completed(from); @@ -541,7 +541,7 @@ void __intel_timeline_free(struct kref *kref) container_of(kref, typeof(*timeline), kref); intel_timeline_fini(timeline); - kfree(timeline); + kfree_rcu(timeline, rcu); } static void timelines_fini(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 2b1baf2fcc8e4..c668c4c50e757 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -80,6 +80,7 @@ struct intel_timeline { struct intel_gt *gt; struct kref kref; + struct rcu_head rcu; }; #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 9d1ea26c7a2da..4ce1e25433d22 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -14,22 +14,28 @@ static int request_sync(struct i915_request *rq) { + struct intel_timeline *tl = i915_request_timeline(rq); long timeout; int err = 0; + intel_timeline_get(tl); i915_request_get(rq); - i915_request_add(rq); + /* Opencode i915_request_add() so we can keep the timeline locked. */ + __i915_request_commit(rq); + __i915_request_queue(rq, NULL); + timeout = i915_request_wait(rq, 0, HZ / 10); - if (timeout < 0) { + if (timeout < 0) err = timeout; - } else { - mutex_lock(&rq->timeline->mutex); + else i915_request_retire_upto(rq); - mutex_unlock(&rq->timeline->mutex); - } + + lockdep_unpin_lock(&tl->mutex, rq->cookie); + mutex_unlock(&tl->mutex); i915_request_put(rq); + intel_timeline_put(tl); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 26d05bd1bdc8b..93a871bfd95d1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1089,7 +1089,7 @@ static int live_suppress_wait_preempt(void *arg) } /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&rq[i]->timeline->last_request, + __i915_active_request_set(&i915_request_timeline(rq[i])->last_request, dummy); i915_request_add(rq[i]); } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 6a447f1d01103..d5aac6ff803a3 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -695,7 +695,7 @@ void i915_request_add_active_barriers(struct i915_request *rq) struct llist_node *node, *next; GEM_BUG_ON(intel_engine_is_virtual(engine)); - GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline); + GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); /* * Attach the list of proto-fences to the in-flight request such diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index f95058f99057b..949c6835335bc 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -373,6 +373,12 @@ int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, struct i915_request *rq); +static inline int +i915_active_add_request(struct i915_active *ref, struct i915_request *rq) +{ + return i915_active_ref(ref, i915_request_timeline(rq), rq); +} + int i915_active_wait(struct i915_active *ref); int i915_request_await_active(struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 606acde47fa04..fb6f21c41934b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -220,7 +220,6 @@ static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; - lockdep_assert_held(&rq->timeline->mutex); if (!i915_request_completed(rq)) return false; @@ -241,7 +240,8 @@ static bool i915_request_retire(struct i915_request *rq) * Note this requires that we are always called in request * completion order. */ - GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests)); + GEM_BUG_ON(!list_is_first(&rq->link, + &i915_request_timeline(rq)->requests)); rq->ring->head = rq->postfix; /* @@ -317,7 +317,7 @@ static bool i915_request_retire(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_timeline * const tl = rq->timeline; + struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *tmp; GEM_TRACE("%s fence %llx:%lld, current %d\n", @@ -325,7 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq) rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); - lockdep_assert_held(&tl->mutex); GEM_BUG_ON(!i915_request_completed(rq)); do { @@ -661,9 +660,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->gem_context = ce->gem_context; rq->engine = ce->engine; rq->ring = ce->ring; - rq->timeline = tl; + + rcu_assign_pointer(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ spin_lock_init(&rq->lock); @@ -771,7 +772,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) return 0; signal = list_prev_entry(signal, link); - if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) + if (intel_timeline_sync_is_later(i915_request_timeline(rq), + &signal->fence)) return 0; return i915_sw_fence_await_dma_fence(&rq->submit, @@ -947,7 +949,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context && - intel_timeline_sync_is_later(rq->timeline, fence)) + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) continue; if (dma_fence_is_i915(fence)) @@ -961,7 +964,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context) - intel_timeline_sync_set(rq->timeline, fence); + intel_timeline_sync_set(i915_request_timeline(rq), + fence); } while (--nchild); return 0; @@ -1103,7 +1107,7 @@ void i915_request_skip(struct i915_request *rq, int error) static struct i915_request * __i915_request_add_to_timeline(struct i915_request *rq) { - struct intel_timeline *timeline = rq->timeline; + struct intel_timeline *timeline = i915_request_timeline(rq); struct i915_request *prev; /* @@ -1216,7 +1220,7 @@ void __i915_request_queue(struct i915_request *rq, void i915_request_add(struct i915_request *rq) { struct i915_sched_attr attr = rq->gem_context->sched; - struct intel_timeline * const tl = rq->timeline; + struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *prev; lockdep_assert_held(&tl->mutex); @@ -1271,7 +1275,9 @@ void i915_request_add(struct i915_request *rq) * work on behalf of others -- but instead we should benefit from * improved resource management. (Well, that's the theory at least.) */ - if (prev && i915_request_completed(prev) && prev->timeline == tl) + if (prev && + i915_request_completed(prev) && + rcu_access_pointer(prev->timeline) == tl) i915_request_retire_upto(prev); mutex_unlock(&tl->mutex); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8ac6e1226a561..b18f49528dedd 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -113,7 +113,7 @@ struct i915_request { struct intel_engine_cs *engine; struct intel_context *hw_context; struct intel_ring *ring; - struct intel_timeline *timeline; + struct intel_timeline __rcu *timeline; struct list_head signal_link; /* @@ -442,6 +442,26 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq) return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); } +static inline struct intel_timeline * +i915_request_timeline(struct i915_request *rq) +{ + /* Valid only while the request is being constructed (or retired). */ + return rcu_dereference_protected(rq->timeline, + lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); +} + +static inline struct intel_timeline * +i915_request_active_timeline(struct i915_request *rq) +{ + /* + * When in use during submission, we are protected by a guarantee that + * the context/timeline is pinned and must remain pinned until after + * this submission. + */ + return rcu_dereference_protected(rq->timeline, + lockdep_is_held(&rq->engine->active.lock)); +} + bool i915_retire_requests(struct drm_i915_private *i915); #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 411047d6a909f..9d5b0f87c2100 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -900,15 +900,13 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - err = i915_active_ref(&vma->active, rq->timeline, rq); + err = i915_active_add_request(&vma->active, rq); if (unlikely(err)) return err; if (flags & EXEC_OBJECT_WRITE) { if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS)) - i915_active_ref(&obj->frontbuffer->write, - rq->timeline, - rq); + i915_active_add_request(&obj->frontbuffer->write, rq); dma_resv_add_excl_fence(vma->resv, &rq->fence); obj->write_domain = I915_GEM_DOMAIN_RENDER; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 77d844ac8b713..afecfa081ff4f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -110,7 +110,7 @@ __live_active_setup(struct drm_i915_private *i915) submit, GFP_KERNEL); if (err >= 0) - err = i915_active_ref(&active->base, rq->timeline, rq); + err = i915_active_add_request(&active->base, rq); i915_request_add(rq); if (err) { pr_err("Failed to track active ref!\n"); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 11f04ad48e688..ee8450b871da9 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -147,7 +147,7 @@ igt_spinner_create_request(struct igt_spinner *spin, intel_gt_chipset_flush(engine->gt); if (engine->emit_init_breadcrumb && - rq->timeline->has_initial_breadcrumb) { + i915_request_timeline(rq)->has_initial_breadcrumb) { err = engine->emit_init_breadcrumb(rq); if (err) goto cancel_rq; From 6a79d848403ded2245fbe4aaf50b33118d12daf0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Sep 2019 12:19:11 +0100 Subject: [PATCH 194/331] drm/i915: Lock signaler timeline while navigating As we need to take a walk back along the signaler timeline to find the fence before upon which we want to wait, we need to lock that timeline to prevent it being modified as we walk. Similarly, we also need to acquire a reference to the earlier fence while it still exists! Though we lack the correct locking today, we are saved by the overarching struct_mutex -- but that protection is being removed. v2: Tvrtko made me realise I was being lax and using annotations to ignore the AB-BA deadlock from the timeline overlap. As it would be possible to construct a second request that was using a semaphore from the same timeline as ourselves, we could quite easily end up in a situation where we deadlocked in our mutex waits. Avoid that by using a trylock and falling back to a normal dma-fence await if contended. v3: Eek, the signal->timeline is volatile and must be carefully dereferenced to ensure it is valid. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190919111912.21631-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 68 +++++++++++++++++++---------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index fb6f21c41934b..9bd8538b19075 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -768,17 +768,43 @@ i915_request_create(struct intel_context *ce) static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - if (list_is_first(&signal->link, &signal->timeline->requests)) - return 0; + struct intel_timeline *tl; + struct dma_fence *fence; + int err; + + GEM_BUG_ON(i915_request_timeline(rq) == + rcu_access_pointer(signal->timeline)); - signal = list_prev_entry(signal, link); - if (intel_timeline_sync_is_later(i915_request_timeline(rq), - &signal->fence)) + rcu_read_lock(); + tl = rcu_dereference(signal->timeline); + if (i915_request_started(signal) || !kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + if (!tl) /* already started or maybe even completed */ return 0; - return i915_sw_fence_await_dma_fence(&rq->submit, - &signal->fence, 0, - I915_FENCE_GFP); + fence = ERR_PTR(-EBUSY); + if (mutex_trylock(&tl->mutex)) { + fence = NULL; + if (!i915_request_started(signal) && + !list_is_first(&signal->link, &tl->requests)) { + signal = list_prev_entry(signal, link); + fence = dma_fence_get(&signal->fence); + } + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + if (IS_ERR_OR_NULL(fence)) + return PTR_ERR_OR_ZERO(fence); + + err = 0; + if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) + err = i915_sw_fence_await_dma_fence(&rq->submit, + fence, 0, + I915_FENCE_GFP); + dma_fence_put(fence); + + return err; } static intel_engine_mask_t @@ -808,30 +834,23 @@ emit_semaphore_wait(struct i915_request *to, u32 hwsp_offset; int len; u32 *cs; - int err; - GEM_BUG_ON(!from->timeline->has_initial_breadcrumb); GEM_BUG_ON(INTEL_GEN(to->i915) < 8); /* Just emit the first semaphore we see as request space is limited. */ if (already_busywaiting(to) & from->engine->mask) - return i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); + goto await_fence; - err = i915_request_await_start(to, from); - if (err < 0) - return err; + if (i915_request_await_start(to, from) < 0) + goto await_fence; /* Only submit our spinner after the signaler is running! */ - err = __i915_request_await_execution(to, from, NULL, gfp); - if (err) - return err; + if (__i915_request_await_execution(to, from, NULL, gfp)) + goto await_fence; /* We need to pin the signaler's HWSP until we are finished reading. */ - err = intel_timeline_read_hwsp(from, to, &hwsp_offset); - if (err) - return err; + if (intel_timeline_read_hwsp(from, to, &hwsp_offset)) + goto await_fence; len = 4; if (has_token) @@ -866,6 +885,11 @@ emit_semaphore_wait(struct i915_request *to, to->sched.semaphores |= from->engine->mask; to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; return 0; + +await_fence: + return i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + I915_FENCE_GFP); } static int From 9eee0dd7d3a4e2620257f1532cba8f013aee954f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Sep 2019 12:19:12 +0100 Subject: [PATCH 195/331] drm/i915: Protect timeline->hwsp dereferencing As not only is the signal->timeline volatile, so will be acquiring the timeline's HWSP. We must first carefully acquire the timeline from the signaling request and then lock the timeline. With the removal of the struct_mutex serialisation of request construction, we can have multiple timelines active at once, and so we must avoid using the nested mutex lock as it is quite possible for both timelines to be establishing semaphores on the other and so deadlock. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190919111912.21631-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_timeline.c | 32 ++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 115a24d4a20a4..9d436e14ea8df 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -500,17 +500,32 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline_cacheline *cl = from->hwsp_cacheline; - struct intel_timeline *tl = from->timeline; + struct intel_timeline *tl; int err; + rcu_read_lock(); + tl = rcu_dereference(from->timeline); + if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + if (!tl) /* already completed */ + return 1; + GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); - mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); - err = i915_request_completed(from); - if (!err) + err = -EBUSY; + if (mutex_trylock(&tl->mutex)) { + struct intel_timeline_cacheline *cl = from->hwsp_cacheline; + + if (i915_request_completed(from)) { + err = 1; + goto unlock; + } + err = cacheline_ref(cl, to); - if (!err) { + if (err) + goto unlock; + if (likely(cl == tl->hwsp_cacheline)) { *hwsp = tl->hwsp_offset; } else { /* across a seqno wrap, recover the original offset */ @@ -518,8 +533,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; } + +unlock: + mutex_unlock(&tl->mutex); } - mutex_unlock(&tl->mutex); + intel_timeline_put(tl); return err; } From e5de91e68c5ccaf4ef3e5aa265b469e186ebfa65 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Sep 2019 09:12:54 +0100 Subject: [PATCH 196/331] Revert "drm/i915/tgl: Implement Wa_1406941453" Our sanitychecks indicate that while this register is context saved/restore, the HW does not preserve this bit within the register -- it likely doesn't exist, or one of those mythical bits that the architects insist does something despite all appearances to the contrary. For reference, SAMPLER_MODE is already in i915_reg.h as GEN10_SAMPLER_MODE and is being setup in icl_ctx_workarounds_init() as opposed to the chosen location here of rcs_engine_wa_init). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111754 Fixes: 7f0cc34b5349 ("drm/i915/tgl: Implement Wa_1406941453") Testcase: igt/i915_selftest/live_workarounds Signed-off-by: Chris Wilson Cc: Lucas De Marchi Cc: Stuart Summers Cc: Radhakrishna Sripada Cc: Jani Nikula Cc: Joonas Lahtinen Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190920081254.18389-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 ------- drivers/gpu/drm/i915/i915_reg.h | 3 --- 2 files changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 25ae608463987..ba65e50189780 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1260,13 +1260,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_GEN(i915, 12)) { - /* Wa_1406941453:tgl */ - wa_masked_en(wal, - SAMPLER_MODE, - SAMPLER_ENABLE_SMALL_PL); - } - if (IS_GEN(i915, 11)) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5e3a6178aff44..f8f52ae6cc6fe 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8965,9 +8965,6 @@ enum { #define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5) #define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3) -#define SAMPLER_MODE _MMIO(0xe18c) -#define SAMPLER_ENABLE_SMALL_PL (1 << 15) - #define GEN8_ROW_CHICKEN _MMIO(0xe4f0) #define FLOW_CONTROL_ENABLE (1 << 15) #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1 << 8) From 35d97e43bb217446cdf128c6b6dfec423ed81ddf Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 19 Sep 2019 13:12:03 -0700 Subject: [PATCH 197/331] drm/i915/uc: Update HuC firmware naming convention and load latest HuC Make both GuC and HuC to use "." as the separator. Hardcode the separator in MAKE_UC_FW_PATH. Remove the usage of "ver" from HuC. The current convention being: _uc_..patch.bin Update the versions of HuC being loaded of the platforms. SKL - v2.0.0 BXT - v2.0.0 KBL - v4.0.0 GLK - v4.0.0 CFL - KBL v4.0.0 ICL - v9.0.0 CML - v4.0.0 v2: Remove the separator parameter altogether from __MAKE_UC_FW_PATH.(Daniele) - Squash all firmware update patches (Daniele) v3: s/huc/HuC - Correct the order of platforms - Change REVID of cml to 5(Michal) - Code space changes in huc_def (Daniele) Suggested-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Signed-off-by: Anusha Srivatsa Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20190919201204.9691-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 296a82603be0a..ea9a807abd4f3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -39,26 +39,27 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Must be ordered based on platform + revid, from newer to older. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398)) - -#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0)) + +#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ __stringify(prefix_) name_ \ - __stringify(major_) separator_ \ - __stringify(minor_) separator_ \ + __stringify(major_) "." \ + __stringify(minor_) "." \ __stringify(patch_) ".bin" #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_) + __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_) #define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ - __MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_) + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) /* All blobs need to be declared via MODULE_FIRMWARE() */ #define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ From 646d3dc855212855b708da4d481051a5e8cd1946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:43 +0300 Subject: [PATCH 198/331] drm/i915: Fix HSW+ DP MSA YCbCr colorspace indication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like we're currently setting the MSA to xvYCC BT.709 instead of the YCbCr BT.601 claimed by the comment. But even that comment is wrong since we configure the CSC matrix to BT.709. Let's remove the bogus statement from the comment and fix the MSA to indicate YCbCr BT.709 so that it matches the actual pixel data we're transmitting. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-3-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_ddi.c | 6 ++++-- drivers/gpu/drm/i915/i915_reg.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 3e6394139964d..fb6cb6aca188e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1730,10 +1730,12 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) /* * As per DP 1.2 spec section 2.3.4.3 while sending * YCBCR 444 signals we should program MSA MISC1/0 fields with - * colorspace information. The output colorspace encoding is BT601. + * colorspace information. */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) - temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR; + temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR | + TRANS_MSA_YCBCR_BT709; + /* * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication * of Color Encoding Format and Content Color Gamut] while sending diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f8f52ae6cc6fe..2a1a1e84d1bc3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9733,7 +9733,8 @@ enum skl_power_gate { #define TRANS_MSA_SYNC_CLK (1 << 0) #define TRANS_MSA_SAMPLING_444 (2 << 1) -#define TRANS_MSA_CLRSP_YCBCR (2 << 3) +#define TRANS_MSA_CLRSP_YCBCR (1 << 3) +#define TRANS_MSA_YCBCR_BT709 (1 << 4) #define TRANS_MSA_6_BPC (0 << 5) #define TRANS_MSA_8_BPC (1 << 5) #define TRANS_MSA_10_BPC (2 << 5) From 791ad5f1e1af2926adef5bb1998eb445681e31f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:44 +0300 Subject: [PATCH 199/331] drm/i915: Fix AVI infoframe quantization range for YCbCr output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're configuring the AVI infoframe quantization range bits as if we're always transmitting RGB pixels. Let's fix this so that we correctly indicate limited range YCC quantization range when transmitting YCbCr instead. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-4-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_hdmi.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 04a8718bd2d71..0082c46202462 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -724,11 +724,16 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder, drm_hdmi_avi_infoframe_colorspace(frame, conn_state); - drm_hdmi_avi_infoframe_quant_range(frame, connector, - adjusted_mode, - crtc_state->limited_color_range ? - HDMI_QUANTIZATION_RANGE_LIMITED : - HDMI_QUANTIZATION_RANGE_FULL); + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_RGB) { + drm_hdmi_avi_infoframe_quant_range(frame, connector, + adjusted_mode, + crtc_state->limited_color_range ? + HDMI_QUANTIZATION_RANGE_LIMITED : + HDMI_QUANTIZATION_RANGE_FULL); + } else { + frame->quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + frame->ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; + } drm_hdmi_avi_infoframe_content_type(frame, conn_state); From ba2d08c2a9ad96e592ee357ef5d46cbe1dc18bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:45 +0300 Subject: [PATCH 200/331] drm/i915: Extract intel_hdmi_limited_color_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code for computing the limited color range setting into a small helper. We'll add a bit more to it later. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-5-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_hdmi.c | 30 +++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 0082c46202462..b781e0d1962ef 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2365,6 +2365,24 @@ static int intel_hdmi_compute_clock(struct intel_encoder *encoder, return 0; } +static bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + const struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { + /* See CEA-861-E - 5.1 Default Encoding Parameters */ + return crtc_state->has_hdmi_sink && + drm_default_rgb_quant_range(adjusted_mode) == + HDMI_QUANTIZATION_RANGE_LIMITED; + } else { + return intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; + } +} + int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2388,16 +2406,8 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (pipe_config->has_hdmi_sink) pipe_config->has_infoframe = true; - if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { - /* See CEA-861-E - 5.1 Default Encoding Parameters */ - pipe_config->limited_color_range = - pipe_config->has_hdmi_sink && - drm_default_rgb_quant_range(adjusted_mode) == - HDMI_QUANTIZATION_RANGE_LIMITED; - } else { - pipe_config->limited_color_range = - intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; - } + pipe_config->limited_color_range = + intel_hdmi_limited_color_range(pipe_config, conn_state); if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; From cae154fcaefeb1ddab491d6fb651452a93bbf81d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 19:45:23 +0300 Subject: [PATCH 201/331] drm/i915: Never set limited_color_range=true for YCbCr output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit crtc_state->limited_color_range only applies to RGB output but we're currently setting it even for YCbCr output. That will lead to conflicting MSA and PIPECONF settings which can mess up the image. Let's make sure limited_color_range stays unset with YCbCr output. Also WARN if we end up with such a bogus combination when programming the MSA MISC bits as it's impossible to even indicate quantization rangle for YCbCr via MSA MISC. YCbCr output is simply assumed to be limited range always. Note that VSC SDP does provide a mechanism for full range YCbCr, so in the future we may want to rethink how we compute/store this state. And for good measure we add the same WARN to the HDMI path. v2: s/==/!=/ in the HDMI WARN Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718164523.11738-1-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_ddi.c | 10 +++++++--- drivers/gpu/drm/i915/display/intel_dp.c | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 20 +++++++++++++++++--- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index fb6cb6aca188e..0c0da9f6c2e88 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1706,9 +1706,6 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) temp = TRANS_MSA_SYNC_CLK; - if (crtc_state->limited_color_range) - temp |= TRANS_MSA_CEA_RANGE; - switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; @@ -1727,6 +1724,13 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) break; } + /* nonsense combination */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + + if (crtc_state->limited_color_range) + temp |= TRANS_MSA_CEA_RANGE; + /* * As per DP 1.2 spec section 2.3.4.3 while sending * YCBCR 444 signals we should program MSA MISC1/0 fields with diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 0c7755e915d91..5cce9459e93c0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2150,6 +2150,16 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + /* + * Our YCbCr output is always limited range. + * crtc_state->limited_color_range only applies to RGB, + * and it must never be set for YCbCr or we risk setting + * some conflicting bits in PIPECONF which will mess up + * the colors on the monitor. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return false; + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index b781e0d1962ef..5ab73331d35df 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -724,6 +724,10 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder, drm_hdmi_avi_infoframe_colorspace(frame, conn_state); + /* nonsense combination */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_RGB) { drm_hdmi_avi_infoframe_quant_range(frame, connector, adjusted_mode, @@ -2373,6 +2377,16 @@ static bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_s const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + /* + * Our YCbCr output is always limited range. + * crtc_state->limited_color_range only applies to RGB, + * and it must never be set for YCbCr or we risk setting + * some conflicting bits in PIPECONF which will mess up + * the colors on the monitor. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return false; + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* See CEA-861-E - 5.1 Default Encoding Parameters */ return crtc_state->has_hdmi_sink && @@ -2406,9 +2420,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (pipe_config->has_hdmi_sink) pipe_config->has_infoframe = true; - pipe_config->limited_color_range = - intel_hdmi_limited_color_range(pipe_config, conn_state); - if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; @@ -2419,6 +2430,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, } } + pipe_config->limited_color_range = + intel_hdmi_limited_color_range(pipe_config, conn_state); + if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv)) pipe_config->has_pch_encoder = true; From 60a02311cc588ae92b4732763650f187c506bc3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:48 +0300 Subject: [PATCH 202/331] drm/i915: Don't look at unrelated PIPECONF bits for interlaced readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since HSW the PIPECONF progressive vs. interlaced selection is done with just two bits instead of the earlier three. Let's not look at the extra bit on HSW+. Also gen2 doesn't support interlaced displays at all. This is actually fine as is currently because the extra bit is mbz (as are all three bits on gen2). But just to avoid mishaps in the future if the bits get reused let's only look at what's properly defined. v2: constify crtc_state Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-8-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_display.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e0033d99f6e34..44460bf70d6bf 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -8203,6 +8203,21 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state) (crtc_state->pipe_src_h - 1)); } +static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + if (IS_GEN(dev_priv, 2)) + return false; + + if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK_HSW; + else + return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK; +} + static void intel_get_pipe_timings(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -8241,7 +8256,7 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, pipe_config->base.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1; pipe_config->base.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1; - if (I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK) { + if (intel_pipe_is_interlaced(pipe_config)) { pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_INTERLACE; pipe_config->base.adjusted_mode.crtc_vtotal += 1; pipe_config->base.adjusted_mode.crtc_vblank_end += 1; From b10d1173eecf811d5293c5097f1025fc20915171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:49 +0300 Subject: [PATCH 203/331] drm/i915: Simplify intel_get_crtc_ycbcr_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make intel_get_crtc_ycbcr_config() simpler and rename it to bdw_get_pipemisc_output_format() to better reflect what it does. Also toss in some comments to document that the 4:2:0 PIPECONF bits are glk+ only. They are mbz on earlier platforms so reading them unconditionally is safe however. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-9-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_display.c | 71 +++++++++----------- drivers/gpu/drm/i915/i915_reg.h | 4 +- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 44460bf70d6bf..95a63acc43a30 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -8727,47 +8727,24 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock); } -static void intel_get_crtc_ycbcr_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static enum intel_output_format +bdw_get_pipemisc_output_format(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum intel_output_format output = INTEL_OUTPUT_FORMAT_RGB; - - pipe_config->lspcon_downsampling = false; - - if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) { - u32 tmp = I915_READ(PIPEMISC(crtc->pipe)); - - if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) { - bool ycbcr420_enabled = tmp & PIPEMISC_YUV420_ENABLE; - bool blend = tmp & PIPEMISC_YUV420_MODE_FULL_BLEND; - - if (ycbcr420_enabled) { - /* We support 4:2:0 in full blend mode only */ - if (!blend) - output = INTEL_OUTPUT_FORMAT_INVALID; - else if (!(IS_GEMINILAKE(dev_priv) || - INTEL_GEN(dev_priv) >= 10)) - output = INTEL_OUTPUT_FORMAT_INVALID; - else - output = INTEL_OUTPUT_FORMAT_YCBCR420; - } else { - /* - * Currently there is no interface defined to - * check user preference between RGB/YCBCR444 - * or YCBCR420. So the only possible case for - * YCBCR444 usage is driving YCBCR420 output - * with LSPCON, when pipe is configured for - * YCBCR444 output and LSPCON takes care of - * downsampling it. - */ - pipe_config->lspcon_downsampling = true; - output = INTEL_OUTPUT_FORMAT_YCBCR444; - } - } - } + u32 tmp; + + tmp = I915_READ(PIPEMISC(crtc->pipe)); - pipe_config->output_format = output; + if (tmp & PIPEMISC_YUV420_ENABLE) { + /* We support 4:2:0 in full blend mode only */ + WARN_ON((tmp & PIPEMISC_YUV420_MODE_FULL_BLEND) == 0); + + return INTEL_OUTPUT_FORMAT_YCBCR420; + } else if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) { + return INTEL_OUTPUT_FORMAT_YCBCR444; + } else { + return INTEL_OUTPUT_FORMAT_RGB; + } } static void i9xx_get_pipe_color_config(struct intel_crtc_state *crtc_state) @@ -10462,7 +10439,23 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, } intel_get_pipe_src_size(crtc, pipe_config); - intel_get_crtc_ycbcr_config(crtc, pipe_config); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) { + pipe_config->output_format = + bdw_get_pipemisc_output_format(crtc); + + /* + * Currently there is no interface defined to + * check user preference between RGB/YCBCR444 + * or YCBCR420. So the only possible case for + * YCBCR444 usage is driving YCBCR420 output + * with LSPCON, when pipe is configured for + * YCBCR444 output and LSPCON takes care of + * downsampling it. + */ + pipe_config->lspcon_downsampling = + pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR444; + } pipe_config->gamma_mode = I915_READ(GAMMA_MODE(crtc->pipe)); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2a1a1e84d1bc3..3cc1baa0b9152 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5775,8 +5775,8 @@ enum { #define _PIPE_MISC_A 0x70030 #define _PIPE_MISC_B 0x71030 -#define PIPEMISC_YUV420_ENABLE (1 << 27) -#define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) +#define PIPEMISC_YUV420_ENABLE (1 << 27) /* glk+ */ +#define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) /* glk+ */ #define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */ #define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11) #define PIPEMISC_DITHER_BPC_MASK (7 << 5) From ac0f01cee94702d49aad3ff4d0607024fa34c9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:50 +0300 Subject: [PATCH 204/331] drm/i915: Add PIPECONF YCbCr 4:4:4 programming for HSW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW the pipe colorspace is configured via PIPECONF (as opposed to PIPEMISC in BDW+). Let's configure+readout that stuff correctly. Enabling YCbCr 4:4:4 output will now be a simple matter of setting crtc_state->output_format appropriately in the encoder .compute_config(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-10-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_display.c | 13 ++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 95a63acc43a30..1bd91a061a0a8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -9444,6 +9444,10 @@ static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state) else val |= PIPECONF_PROGRESSIVE; + if (IS_HASWELL(dev_priv) && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + val |= PIPECONF_OUTPUT_COLORSPACE_YUV_HSW; + I915_WRITE(PIPECONF(cpu_transcoder), val); POSTING_READ(PIPECONF(cpu_transcoder)); } @@ -10440,7 +10444,14 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, intel_get_pipe_src_size(crtc, pipe_config); - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) { + if (IS_HASWELL(dev_priv)) { + u32 tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); + + if (tmp & PIPECONF_OUTPUT_COLORSPACE_YUV_HSW) + pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; + else + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + } else { pipe_config->output_format = bdw_get_pipemisc_output_format(crtc); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3cc1baa0b9152..6f2eda76da4a4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5683,6 +5683,7 @@ enum { #define PIPECONF_CXSR_DOWNCLOCK (1 << 16) #define PIPECONF_EDP_RR_MODE_SWITCH_VLV (1 << 14) #define PIPECONF_COLOR_RANGE_SELECT (1 << 13) +#define PIPECONF_OUTPUT_COLORSPACE_YUV_HSW (1 << 11) /* hsw only */ #define PIPECONF_BPC_MASK (0x7 << 5) #define PIPECONF_8BPC (0 << 5) #define PIPECONF_10BPC (1 << 5) From 174d12bcc0874d78ad7fd63ae769cac864ee9f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:51 +0300 Subject: [PATCH 205/331] drm/i915: Document ILK+ pipe csc matrix better MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comments to explain the ilk pipe csc operation a bit better. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-11-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_color.c | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 318308dc136cb..de31b4c251faf 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -42,6 +42,21 @@ #define LEGACY_LUT_LENGTH 256 +/* + * ILK+ csc matrix: + * + * |R/Cr| | c0 c1 c2 | ( |R/Cr| |preoff0| ) |postoff0| + * |G/Y | = | c3 c4 c5 | x ( |G/Y | + |preoff1| ) + |postoff1| + * |B/Cb| | c6 c7 c8 | ( |B/Cb| |preoff2| ) |postoff2| + * + * ILK/SNB don't have explicit post offsets, and instead + * CSC_MODE_YUV_TO_RGB and CSC_BLACK_SCREEN_OFFSET are used: + * CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=0 -> 1/2, 0, 1/2 + * CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/2, 1/16, 1/2 + * CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=0 -> 0, 0, 0 + * CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/16, 1/16, 1/16 + */ + /* * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point * format). This macro takes the coefficient we want transformed and the @@ -59,37 +74,38 @@ #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255) +/* Nop pre/post offsets */ static const u16 ilk_csc_off_zero[3] = {}; +/* Identity matrix */ static const u16 ilk_csc_coeff_identity[9] = { ILK_CSC_COEFF_1_0, 0, 0, 0, ILK_CSC_COEFF_1_0, 0, 0, 0, ILK_CSC_COEFF_1_0, }; +/* Limited range RGB post offsets */ static const u16 ilk_csc_postoff_limited_range[3] = { ILK_CSC_POSTOFF_LIMITED_RANGE, ILK_CSC_POSTOFF_LIMITED_RANGE, ILK_CSC_POSTOFF_LIMITED_RANGE, }; +/* Full range RGB -> limited range RGB matrix */ static const u16 ilk_csc_coeff_limited_range[9] = { ILK_CSC_COEFF_LIMITED_RANGE, 0, 0, 0, ILK_CSC_COEFF_LIMITED_RANGE, 0, 0, 0, ILK_CSC_COEFF_LIMITED_RANGE, }; -/* - * These values are direct register values specified in the Bspec, - * for RGB->YUV conversion matrix (colorspace BT709) - */ +/* BT.709 full range RGB -> limited range YCbCr matrix */ static const u16 ilk_csc_coeff_rgb_to_ycbcr[9] = { 0x1e08, 0x9cc0, 0xb528, 0x2ba8, 0x09d8, 0x37e8, 0xbce8, 0x9ad8, 0x1e08, }; -/* Post offset values for RGB->YCBCR conversion */ +/* Limited range YCbCr post offsets */ static const u16 ilk_csc_postoff_rgb_to_ycbcr[3] = { 0x0800, 0x0100, 0x0800, }; From af28cc4c289600ad05da85e466517757a26ce216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:52 +0300 Subject: [PATCH 206/331] drm/i915: Set up ILK/SNB csc unit properly for YCbCr output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare the pipe csc for YCbCr output on ilk/snb. The main difference to IVB+ is the lack of explicit post offsets, and instead we must configure the CSC info RGB->YUV mode (which takes care of offsetting Cb/Cr properly) and enable the "black screen offset" bit to add the required offset to Y. And while at it throw some comments around the bit defines to document which platforms have which bits. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-12-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_color.c | 25 +++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 10 ++++----- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index de31b4c251faf..2de640bd3e738 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1213,6 +1213,21 @@ static u32 ilk_gamma_mode(const struct intel_crtc_state *crtc_state) return GAMMA_MODE_MODE_10BIT; } +static u32 ilk_csc_mode(const struct intel_crtc_state *crtc_state) +{ + /* + * CSC comes after the LUT in RGB->YCbCr mode. + * RGB->YCbCr needs the limited range offsets added to + * the output. RGB limited range output is handled by + * the hw automagically elsewhere. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return CSC_BLACK_SCREEN_OFFSET; + + return CSC_MODE_YUV_TO_RGB | + CSC_POSITION_BEFORE_GAMMA; +} + static int ilk_color_check(struct intel_crtc_state *crtc_state) { int ret; @@ -1226,15 +1241,15 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) !crtc_state->c8_planes; /* - * We don't expose the ctm on ilk/snb currently, - * nor do we enable YCbCr output. Also RGB limited - * range output is handled by the hw automagically. + * We don't expose the ctm on ilk/snb currently, also RGB + * limited range output is handled by the hw automagically. */ - crtc_state->csc_enable = false; + crtc_state->csc_enable = + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB; crtc_state->gamma_mode = ilk_gamma_mode(crtc_state); - crtc_state->csc_mode = 0; + crtc_state->csc_mode = ilk_csc_mode(crtc_state); ret = intel_color_add_affected_planes(crtc_state); if (ret) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6f2eda76da4a4..90c45fee9ce71 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10249,11 +10249,11 @@ enum skl_power_gate { #define _PIPE_A_CSC_COEFF_BV 0x49024 #define _PIPE_A_CSC_MODE 0x49028 -#define ICL_CSC_ENABLE (1 << 31) -#define ICL_OUTPUT_CSC_ENABLE (1 << 30) -#define CSC_BLACK_SCREEN_OFFSET (1 << 2) -#define CSC_POSITION_BEFORE_GAMMA (1 << 1) -#define CSC_MODE_YUV_TO_RGB (1 << 0) +#define ICL_CSC_ENABLE (1 << 31) /* icl+ */ +#define ICL_OUTPUT_CSC_ENABLE (1 << 30) /* icl+ */ +#define CSC_BLACK_SCREEN_OFFSET (1 << 2) /* ilk/snb */ +#define CSC_POSITION_BEFORE_GAMMA (1 << 1) /* pre-glk */ +#define CSC_MODE_YUV_TO_RGB (1 << 0) /* ilk/snb */ #define _PIPE_A_CSC_PREOFF_HI 0x49030 #define _PIPE_A_CSC_PREOFF_ME 0x49034 From d1844606fd63af1994e153a8607d4b13709bc395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Jul 2019 17:50:53 +0300 Subject: [PATCH 207/331] drm/i915: Add PIPECONF YCbCr 4:4:4 programming for ILK-IVB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ILK-IVB the pipe colorspace is configured via PIPECONF (as opposed to PIPEMISC in BDW+). Let's configure+readout that stuff correctly. Enabling YCbCr 4:4:4 output will now be a simple matter of setting crtc_state->output_format appropriately in the encoder .compute_config(). However, when we do that we must be aware of the fact that YCbCr DP output doesn't seem to work on ILK (resulting image is totally garbled), but on SNB+ it works fine. However HDMI YCbCr output does work correctly even on ILK. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190718145053.25808-13-ville.syrjala@linux.intel.com Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/display/intel_display.c | 21 +++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1bd91a061a0a8..f5f655cde43a9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -9420,9 +9420,19 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) else val |= PIPECONF_PROGRESSIVE; + /* + * This would end up with an odd purple hue over + * the entire display. Make sure we don't do it. + */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + if (crtc_state->limited_color_range) val |= PIPECONF_COLOR_RANGE_SELECT; + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + val |= PIPECONF_OUTPUT_COLORSPACE_YUV709; + val |= PIPECONF_GAMMA_MODE(crtc_state->gamma_mode); I915_WRITE(PIPECONF(pipe), val); @@ -9959,7 +9969,6 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (!wakeref) return false; - pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; @@ -9988,6 +9997,16 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (tmp & PIPECONF_COLOR_RANGE_SELECT) pipe_config->limited_color_range = true; + switch (tmp & PIPECONF_OUTPUT_COLORSPACE_MASK) { + case PIPECONF_OUTPUT_COLORSPACE_YUV601: + case PIPECONF_OUTPUT_COLORSPACE_YUV709: + pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; + break; + default: + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + break; + } + pipe_config->gamma_mode = (tmp & PIPECONF_GAMMA_MODE_MASK_ILK) >> PIPECONF_GAMMA_MODE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 90c45fee9ce71..acbda6d89a9bc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5683,6 +5683,10 @@ enum { #define PIPECONF_CXSR_DOWNCLOCK (1 << 16) #define PIPECONF_EDP_RR_MODE_SWITCH_VLV (1 << 14) #define PIPECONF_COLOR_RANGE_SELECT (1 << 13) +#define PIPECONF_OUTPUT_COLORSPACE_MASK (3 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_RGB (0 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV601 (1 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV709 (2 << 11) /* ilk-ivb */ #define PIPECONF_OUTPUT_COLORSPACE_YUV_HSW (1 << 11) /* hsw only */ #define PIPECONF_BPC_MASK (0x7 << 5) #define PIPECONF_8BPC (0 << 5) From 601734f7aabd46d3a988554e59908d9de7f8b013 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 13 Sep 2019 08:51:37 +0100 Subject: [PATCH 208/331] drm/i915/tgl: s/ss/eu fuse reading support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen12 has dual-subslices (DSS), which compared to gen11 subslices have some duplicated resources/paths. Although DSS behave similarly to 2 subslices, instead of splitting this and presenting userspace with bits not directly representative of hardware resources, present userspace with a subslice_mask made up of DSS bits instead. v2: GEM_BUG_ON on mask size (Lionel) Bspec: 29547 Bspec: 12247 Cc: Kelvin Gardiner Cc: Tvrtko Ursulin Cc: Lionel Landwerlin CC: Radhakrishna Sripada Cc: Michel Thierry #v1 Cc: Daniele Ceraolo Spurio Cc: José Roberto de Souza Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: James Ausmus Signed-off-by: Oscar Mateo Signed-off-by: Sudeep Dutt Signed-off-by: Stuart Summers Signed-off-by: Mika Kuoppala Acked-by: Lionel Landwerlin Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190913075137.18476-2-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------ include/uapi/drm/i915_drm.h | 6 +- 5 files changed, 72 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4070f6ff1db60..d1d225204f09d 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -18,12 +18,13 @@ struct drm_i915_private; #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) -#define GEN_MAX_EUS (10) /* HSW upper bound */ +#define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -40,12 +41,6 @@ struct sseu_dev_info { u8 ss_stride; u8 eu_stride; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 43db50095257c..b5b449a88cf1d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index acbda6d89a9bc..7c64768976ac6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 728c881718a2d..85e480bdc6737 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u8 s_en; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u8 s_en; - u32 ss_en, ss_en_mask; + u32 ss_en; u8 eu_en; - int s; if (IS_ELKHARTLAKE(dev_priv)) intel_sseu_set_info(sseu, 1, 4, 8); @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); - ss_en_mask = BIT(sseu->max_subslices) - 1; eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - for (s = 0; s < sseu->max_slices; s++) { - if (s_en & BIT(s)) { - int ss_idx = sseu->max_subslices * s; - int ss; - - sseu->slice_mask |= BIT(s); - - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & - ss_en_mask); - - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } - } - sseu->eu_per_subslice = hweight8(eu_en); - sseu->eu_total = compute_eu_total(sseu); + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -955,8 +994,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) + else if (IS_GEN(dev_priv, 11)) gen11_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 12) + gen12_sseu_info_init(dev_priv); if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { DRM_INFO("Disabling ppGTT for VT-d support\n"); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 469dc512cca35..30c5421440166 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2033,8 +2033,10 @@ struct drm_i915_query { * (data[X / 8] >> (X % 8)) & 1 * * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. The availability of subslice Y in slice - * X can be queried with the following formula : + * whether a subslice is available. Gen12 has dual-subslices, which are + * similar to two gen11 subslices. For gen12, this array represents dual- + * subslices. The availability of subslice Y in slice X can be queried + * with the following formula : * * (data[subslice_offset + * X * subslice_stride + From bf93b7246548289a797798051198c9664af2931c Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Sun, 22 Sep 2019 01:40:52 +0530 Subject: [PATCH 209/331] drm/i915/color: Fix formatting issues Fixed few formatting issues in multi-segmented load_lut(). v3: -style nitting [Jani] -balanced parentheses moved from patch 2 to 1 [Jani] -subject prefix change [Jani] -added commit message [Jani] v4: -rearranged INDEX register write in ilk_read_luts() Signed-off-by: Swati Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1569096654-24433-2-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 40 ++++++++++------------ 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 2de640bd3e738..91435ba426d01 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -823,11 +823,11 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) u32 i; /* - * Every entry in the multi-segment LUT is corresponding to a superfine - * segment step which is 1/(8 * 128 * 256). + * Program Super Fine segment (let's call it seg1)... * - * Superfine segment has 9 entries, corresponding to values - * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). + * Super Fine segment's step is 1/(8 * 128 * 256) and it has + * 9 entries, corresponding to values 0, 1/(8 * 128 * 256), + * 2/(8 * 128 * 256) ... 8/(8 * 128 * 256). */ I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); @@ -853,17 +853,17 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) u32 i; /* - * * Program Fine segment (let's call it seg2)... * - * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) - * ... 256/(128*256). So in order to program fine segment of LUT we - * need to pick every 8'th entry in LUT, and program 256 indexes. + * Fine segment's step is 1/(128 * 256) i.e. 1/(128 * 256), 2/(128 * 256) + * ... 256/(128 * 256). So in order to program fine segment of LUT we + * need to pick every 8th entry in the LUT, and program 256 indexes. * * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], - * with seg2[0] being unused by the hardware. + * seg2[0] being unused by the hardware. */ I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + for (i = 1; i < 257; i++) { entry = &lut[i * 8]; I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); @@ -873,8 +873,8 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) /* * Program Coarse segment (let's call it seg3)... * - * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, - * 1/256, 2/256 ...256/256. As per the description of each entry in LUT + * Coarse segment starts from index 0 and it's step is 1/256 ie 0, + * 1/256, 2/256 ... 256/256. As per the description of each entry in LUT * above, we need to pick every (8 * 128)th entry in LUT, and * program 256 of those. * @@ -906,12 +906,10 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) case GAMMA_MODE_MODE_8BIT: i9xx_load_luts(crtc_state); break; - case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: icl_program_gamma_superfine_segment(crtc_state); icl_program_gamma_multi_segment(crtc_state); break; - default: bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); ivb_load_lut_ext_max(crtc); @@ -1743,9 +1741,6 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) struct drm_color_lut *blob_data; u32 i, val; - I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | - PAL_PREC_AUTO_INCREMENT); - blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * hw_lut_size, NULL); @@ -1754,6 +1749,9 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) blob_data = blob->data; + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | + PAL_PREC_AUTO_INCREMENT); + for (i = 0; i < hw_lut_size; i++) { val = I915_READ(PREC_PAL_DATA(pipe)); @@ -1819,16 +1817,16 @@ void intel_color_init(struct intel_crtc *crtc) else dev_priv->display.color_commit = ilk_color_commit; - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.load_luts = icl_load_luts; - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { + } else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { dev_priv->display.load_luts = glk_load_luts; dev_priv->display.read_luts = glk_read_luts; - } else if (INTEL_GEN(dev_priv) >= 8) + } else if (INTEL_GEN(dev_priv) >= 8) { dev_priv->display.load_luts = bdw_load_luts; - else if (INTEL_GEN(dev_priv) >= 7) + } else if (INTEL_GEN(dev_priv) >= 7) { dev_priv->display.load_luts = ivb_load_luts; - else { + } else { dev_priv->display.load_luts = ilk_load_luts; dev_priv->display.read_luts = ilk_read_luts; } From 84af7649188194a74cdd6437235a5e3c86108f0f Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Sun, 22 Sep 2019 01:40:53 +0530 Subject: [PATCH 210/331] drm/i915/color: Extract icl_read_luts() For icl+, have hw read out to create hw blob of gamma lut values. icl+ platforms supports multi segmented gamma mode by default, add hw lut creation for this mode. This will be used to validate gamma programming using dsb (display state buffer) which is a tgl specific feature. Major change done-removal of readouts of coarse and fine segments because PAL_PREC_DATA register isn't giving propoer values. State checker limited only to "fine segment" v2: -readout code for multisegmented gamma has to come up with some intermediate entries that aren't preserved in hardware (Jani N) -linear interpolation (Ville) -moved common code to check gamma_enable to specific funcs, since icl doesn't support that v3: -use u16 instead of __u16 [Jani N] -used single lut [Jani N] -improved and more readable for loops [Jani N] -read values directly to actual locations and then fill gaps [Jani N] -moved cleaning to patch 1 [Jani N] -renamed icl_read_lut_multi_seg() to icl_read_lut_multi_segment to make it similar to icl_load_luts() -renamed icl_compute_interpolated_gamma_blob() to icl_compute_interpolated_gamma_lut_values() more sensible, I guess v4: -removed interpolated func for creating gamma lut values -removed readouts of fine and coarse segments, failure to read PAL_PREC_DATA correctly Signed-off-by: Swati Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1569096654-24433-3-git-send-email-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 126 ++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 6 + 2 files changed, 117 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 91435ba426d01..b0268a9bde8a7 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1402,6 +1402,9 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1415,6 +1418,9 @@ static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) return 0; @@ -1431,6 +1437,9 @@ static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) return 10; else @@ -1439,6 +1448,9 @@ static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1450,21 +1462,39 @@ static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) } } +static int icl_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if ((crtc_state->gamma_mode & POST_CSC_GAMMA_ENABLE) == 0) + return 0; + + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: + return 16; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } + +} + int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (!crtc_state->gamma_enable) - return 0; - if (HAS_GMCH(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) return chv_gamma_precision(crtc_state); else return i9xx_gamma_precision(crtc_state); } else { - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 11) + return icl_gamma_precision(crtc_state); + else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) return glk_gamma_precision(crtc_state); else if (IS_IRONLAKE(dev_priv)) return ilk_gamma_precision(crtc_state); @@ -1495,6 +1525,20 @@ static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1, return true; } +static bool intel_color_lut_entry_multi_equal(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, + int lut_size, u32 err) +{ + int i; + + for (i = 0; i < 9; i++) { + if (!err_check(&lut1[i], &lut2[i], err)) + return false; + } + + return true; +} + bool intel_color_lut_equal(struct drm_property_blob *blob1, struct drm_property_blob *blob2, u32 gamma_mode, u32 bit_precision) @@ -1513,16 +1557,8 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, lut_size2 = drm_color_lut_size(blob2); /* check sw and hw lut size */ - switch (gamma_mode) { - case GAMMA_MODE_MODE_8BIT: - case GAMMA_MODE_MODE_10BIT: - if (lut_size1 != lut_size2) - return false; - break; - default: - MISSING_CASE(gamma_mode); - return false; - } + if (lut_size1 != lut_size2) + return false; lut1 = blob1->data; lut2 = blob2->data; @@ -1530,13 +1566,18 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, err = 0xffff >> bit_precision; /* check sw and hw lut entry to be equal */ - switch (gamma_mode) { + switch (gamma_mode & GAMMA_MODE_MODE_MASK) { case GAMMA_MODE_MODE_8BIT: case GAMMA_MODE_MODE_10BIT: if (!intel_color_lut_entry_equal(lut1, lut2, lut_size2, err)) return false; break; + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: + if (!intel_color_lut_entry_multi_equal(lut1, lut2, + lut_size2, err)) + return false; + break; default: MISSING_CASE(gamma_mode); return false; @@ -1776,6 +1817,60 @@ static void glk_read_luts(struct intel_crtc_state *crtc_state) crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); } +static struct drm_property_blob * +icl_read_lut_multi_segment(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val1, val2; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + + for (i = 0; i < 9; i++) { + val1 = I915_READ(PREC_PAL_MULTI_SEG_DATA(pipe)); + val2 = I915_READ(PREC_PAL_MULTI_SEG_DATA(pipe)); + + blob_data[i].red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, val2) << 6 | + REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, val1); + blob_data[i].green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, val2) << 6 | + REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, val1); + blob_data[i].blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, val2) << 6 | + REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, val1); + } + + /* + * FIXME readouts from PAL_PREC_DATA register aren't giving correct values + * in the case of fine and coarse segments. Restricting readouts only for + * super fine segment as of now. + */ + + return blob; +} + +static void icl_read_luts(struct intel_crtc_state *crtc_state) +{ + if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == + GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == + GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) + crtc_state->base.gamma_lut = icl_read_lut_multi_segment(crtc_state); + else + crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1819,6 +1914,7 @@ void intel_color_init(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.load_luts = icl_load_luts; + dev_priv->display.read_luts = icl_read_luts; } else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { dev_priv->display.load_luts = glk_load_luts; dev_priv->display.read_luts = glk_read_luts; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7c64768976ac6..9eb71a547103a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10410,6 +10410,12 @@ enum skl_power_gate { #define _PAL_PREC_MULTI_SEG_DATA_A 0x4A40C #define _PAL_PREC_MULTI_SEG_DATA_B 0x4AC0C +#define PAL_PREC_MULTI_SEG_RED_LDW_MASK REG_GENMASK(29, 24) +#define PAL_PREC_MULTI_SEG_RED_UDW_MASK REG_GENMASK(29, 20) +#define PAL_PREC_MULTI_SEG_GREEN_LDW_MASK REG_GENMASK(19, 14) +#define PAL_PREC_MULTI_SEG_GREEN_UDW_MASK REG_GENMASK(19, 10) +#define PAL_PREC_MULTI_SEG_BLUE_LDW_MASK REG_GENMASK(9, 4) +#define PAL_PREC_MULTI_SEG_BLUE_UDW_MASK REG_GENMASK(9, 0) #define PREC_PAL_MULTI_SEG_INDEX(pipe) _MMIO_PIPE(pipe, \ _PAL_PREC_MULTI_SEG_INDEX_A, \ From 18febcb74ed37abd835da0ab5335df67ed40e4b5 Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:21 +0530 Subject: [PATCH 211/331] drm/i915/dsb: feature flag added for display state buffer. Display State Buffer(DSB) is a new hardware capability, introduced in GEN12 display. DSB allows a driver to batch-program display HW registers. Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-2-animesh.manna@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4faec2f94e19e..84b9b138d7acf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1863,6 +1863,8 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(n)) + \ INTEL_INFO(dev_priv)->gen == (n)) +#define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) + /* * Return true if revision is in range [since,until] inclusive. * diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d4c288860aed2..0cdc2465534b5 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -135,6 +135,7 @@ enum intel_ppgtt_type { func(has_csr); \ func(has_ddi); \ func(has_dp_mst); \ + func(has_dsb); \ func(has_fbc); \ func(has_gmch); \ func(has_hotplug); \ From 67f3b58f3bac975f35c312fd8876edb599cc24be Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:22 +0530 Subject: [PATCH 212/331] drm/i915/dsb: DSB context creation. This patch adds a function, which will internally get the gem buffer for DSB engine. The GEM buffer is from global GTT, and is mapped into CPU domain, contains the data + opcode to be feed to DSB engine. v1: Initial version. v2: - removed some unwanted code. (Chris) - Used i915_gem_object_create_internal instead of _shmem. (Chris) - cmd_buf_tail removed and can be derived through vma object. (Chris) v3: vma realeased if i915_gem_object_pin_map() failed. (Shashank) v4: for simplification and based on current usage added single dsb object in intel_crtc. (Shashank) v5: seting NULL to cmd_buf moved outside of mutex in dsb-put(). (Shashank) v6: - refcount machanism added. - Used atomic_add_return and atomic_dec_and_test instead of atomic_inc and atomic_dec. (Jani) Cc: Imre Deak Cc: Michel Thierry Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Signed-off-by: Animesh Manna [Jani: added #include while pushing] Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-3-animesh.manna@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + .../drm/i915/display/intel_display_types.h | 3 + drivers/gpu/drm/i915/display/intel_dsb.c | 80 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 33 ++++++++ drivers/gpu/drm/i915/i915_drv.h | 1 + 5 files changed, 118 insertions(+) create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.c create mode 100644 drivers/gpu/drm/i915/display/intel_dsb.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 658b930d34a81..6313e7b4bd784 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -172,6 +172,7 @@ i915-y += \ display/intel_display_power.o \ display/intel_dpio_phy.o \ display/intel_dpll_mgr.o \ + display/intel_dsb.o \ display/intel_fbc.o \ display/intel_fifo_underrun.o \ display/intel_frontbuffer.o \ diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index d5cc4b810d9ef..49c902b00484c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1033,6 +1033,9 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; + + /* per pipe DSB related info */ + struct intel_dsb dsb; }; struct intel_plane { diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c new file mode 100644 index 0000000000000..2ed277670f15f --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + * + */ + +#include "i915_drv.h" +#include "intel_display_types.h" + +#define DSB_BUF_SIZE (2 * PAGE_SIZE) + +struct intel_dsb * +intel_dsb_get(struct intel_crtc *crtc) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *i915 = to_i915(dev); + struct intel_dsb *dsb = &crtc->dsb; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + intel_wakeref_t wakeref; + + if (!HAS_DSB(i915)) + return dsb; + + if (atomic_add_return(1, &dsb->refcount) != 1) + return dsb; + + dsb->id = DSB1; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + obj = i915_gem_object_create_internal(i915, DSB_BUF_SIZE); + if (IS_ERR(obj)) { + DRM_ERROR("Gem object creation failed\n"); + goto err; + } + + mutex_lock(&i915->drm.struct_mutex); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(vma)) { + DRM_ERROR("Vma creation failed\n"); + i915_gem_object_put(obj); + atomic_dec(&dsb->refcount); + goto err; + } + + dsb->cmd_buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(dsb->cmd_buf)) { + DRM_ERROR("Command buffer creation failed\n"); + i915_vma_unpin_and_release(&vma, 0); + dsb->cmd_buf = NULL; + atomic_dec(&dsb->refcount); + goto err; + } + dsb->vma = vma; + +err: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + return dsb; +} + +void intel_dsb_put(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + + if (!HAS_DSB(i915)) + return; + + if (WARN_ON(atomic_read(&dsb->refcount) == 0)) + return; + + if (atomic_dec_and_test(&dsb->refcount)) { + mutex_lock(&i915->drm.struct_mutex); + i915_gem_object_unpin_map(dsb->vma->obj); + i915_vma_unpin_and_release(&dsb->vma, 0); + mutex_unlock(&i915->drm.struct_mutex); + dsb->cmd_buf = NULL; + } +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h new file mode 100644 index 0000000000000..b8639864df506 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _INTEL_DSB_H +#define _INTEL_DSB_H + +#include + +struct intel_crtc; +struct i915_vma; + +enum dsb_id { + INVALID_DSB = -1, + DSB1, + DSB2, + DSB3, + MAX_DSB_PER_PIPE +}; + +struct intel_dsb { + atomic_t refcount; + enum dsb_id id; + u32 *cmd_buf; + struct i915_vma *vma; +}; + +struct intel_dsb * +intel_dsb_get(struct intel_crtc *crtc); +void intel_dsb_put(struct intel_dsb *dsb); + +#endif diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 84b9b138d7acf..07f1e89a55ca0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -67,6 +67,7 @@ #include "display/intel_display.h" #include "display/intel_display_power.h" #include "display/intel_dpll_mgr.h" +#include "display/intel_dsb.h" #include "display/intel_frontbuffer.h" #include "display/intel_gmbus.h" #include "display/intel_opregion.h" From 061489c65ff57ee9f757d7a519fb9a09e5fbadd6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 23 Sep 2019 10:09:23 +0300 Subject: [PATCH 213/331] drm/i915/dsb: single register write function for DSB. DSB support single register write through opcode 0x1. Generic api created which accumulate all single register write in a batch buffer and once DSB is triggered, it will program all the registers at the same time. v1: Initial version. v2: Unused macro removed and cosmetic changes done. (Shashank) v3: set free_pos to zero in dsb-put() instead dsb-get() and a cosmetic change. (Shashank) v4: macro of indexed-write is moved. (Shashank) Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-4-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 29 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 9 ++++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 2ed277670f15f..f94cd6dc98b6d 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -9,6 +9,12 @@ #define DSB_BUF_SIZE (2 * PAGE_SIZE) +/* DSB opcodes. */ +#define DSB_OPCODE_SHIFT 24 +#define DSB_OPCODE_MMIO_WRITE 0x1 +#define DSB_BYTE_EN 0xF +#define DSB_BYTE_EN_SHIFT 20 + struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc) { @@ -76,5 +82,28 @@ void intel_dsb_put(struct intel_dsb *dsb) i915_vma_unpin_and_release(&dsb->vma, 0); mutex_unlock(&i915->drm.struct_mutex); dsb->cmd_buf = NULL; + dsb->free_pos = 0; + } +} + +void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 *buf = dsb->cmd_buf; + + if (!buf) { + I915_WRITE(reg, val); + return; + } + + if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) { + DRM_DEBUG_KMS("DSB buffer overflow\n"); + return; } + + buf[dsb->free_pos++] = val; + buf[dsb->free_pos++] = (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | + (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | + i915_mmio_reg_offset(reg); } diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index b8639864df506..839cbf621fa27 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -8,6 +8,8 @@ #include +#include "i915_reg.h" + struct intel_crtc; struct i915_vma; @@ -24,10 +26,17 @@ struct intel_dsb { enum dsb_id id; u32 *cmd_buf; struct i915_vma *vma; + + /* + * free_pos will point the first free entry position + * and help in calculating tail of command buffer. + */ + int free_pos; }; struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc); void intel_dsb_put(struct intel_dsb *dsb); +void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); #endif From b27a96ad72fd706adc5dbbfd7bba76f698fe3875 Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:24 +0530 Subject: [PATCH 214/331] drm/i915/dsb: Indexed register write function for DSB. DSB can program large set of data through indexed register write (opcode 0x9) in one shot. DSB feature can be used for bulk register programming e.g. gamma lut programming, HDR meta data programming. v1: initial version. v2: simplified code by using ALIGN(). (Chris) v3: ascii table added as code comment. (Shashank) v4: cosmetic changes done. (Shashank) v5: reset ins_start_offset. (Jani) v6: update ins_start_offset in inel_dsb_reg_write. Cc: Shashank Sharma Cc: Imre Deak Cc: Jani Nikula Cc: Rodrigo Vivi Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-5-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 68 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 9 ++++ 2 files changed, 77 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index f94cd6dc98b6d..faa853b084586 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -12,8 +12,10 @@ /* DSB opcodes. */ #define DSB_OPCODE_SHIFT 24 #define DSB_OPCODE_MMIO_WRITE 0x1 +#define DSB_OPCODE_INDEXED_WRITE 0x9 #define DSB_BYTE_EN 0xF #define DSB_BYTE_EN_SHIFT 20 +#define DSB_REG_VALUE_MASK 0xfffff struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc) @@ -83,9 +85,74 @@ void intel_dsb_put(struct intel_dsb *dsb) mutex_unlock(&i915->drm.struct_mutex); dsb->cmd_buf = NULL; dsb->free_pos = 0; + dsb->ins_start_offset = 0; } } +void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, + u32 val) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 *buf = dsb->cmd_buf; + u32 reg_val; + + if (!buf) { + I915_WRITE(reg, val); + return; + } + + if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) { + DRM_DEBUG_KMS("DSB buffer overflow\n"); + return; + } + + /* + * For example the buffer will look like below for 3 dwords for auto + * increment register: + * +--------------------------------------------------------+ + * | size = 3 | offset &| value1 | value2 | value3 | zero | + * | | opcode | | | | | + * +--------------------------------------------------------+ + * + + + + + + + + * 0 4 8 12 16 20 24 + * Byte + * + * As every instruction is 8 byte aligned the index of dsb instruction + * will start always from even number while dealing with u32 array. If + * we are writing odd no of dwords, Zeros will be added in the end for + * padding. + */ + reg_val = buf[dsb->ins_start_offset + 1] & DSB_REG_VALUE_MASK; + if (reg_val != i915_mmio_reg_offset(reg)) { + /* Every instruction should be 8 byte aligned. */ + dsb->free_pos = ALIGN(dsb->free_pos, 2); + + dsb->ins_start_offset = dsb->free_pos; + + /* Update the size. */ + buf[dsb->free_pos++] = 1; + + /* Update the opcode and reg. */ + buf[dsb->free_pos++] = (DSB_OPCODE_INDEXED_WRITE << + DSB_OPCODE_SHIFT) | + i915_mmio_reg_offset(reg); + + /* Update the value. */ + buf[dsb->free_pos++] = val; + } else { + /* Update the new value. */ + buf[dsb->free_pos++] = val; + + /* Update the size. */ + buf[dsb->ins_start_offset]++; + } + + /* if number of data words is odd, then the last dword should be 0.*/ + if (dsb->free_pos & 0x1) + buf[dsb->free_pos] = 0; +} + void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); @@ -102,6 +169,7 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) return; } + dsb->ins_start_offset = dsb->free_pos; buf[dsb->free_pos++] = val; buf[dsb->free_pos++] = (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index 839cbf621fa27..e4dd0ab799ec6 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -32,11 +32,20 @@ struct intel_dsb { * and help in calculating tail of command buffer. */ int free_pos; + + /* + * ins_start_offset will help to store start address of the dsb + * instuction and help in identifying the batch of auto-increment + * register. + */ + u32 ins_start_offset; }; struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc); void intel_dsb_put(struct intel_dsb *dsb); void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); +void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, + u32 val); #endif From a6e58d9a2e045e800ac54b838c05656f982c36fe Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:25 +0530 Subject: [PATCH 215/331] drm/i915/dsb: Check DSB engine status. As per bspec check for DSB status before programming any of its register. Inline function added to check the dsb status. Cc: Michel Thierry Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-6-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 9 +++++++++ drivers/gpu/drm/i915/i915_reg.h | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index faa853b084586..6000050b18a67 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -17,6 +17,15 @@ #define DSB_BYTE_EN_SHIFT 20 #define DSB_REG_VALUE_MASK 0xfffff +static inline bool is_dsb_busy(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + + return DSB_STATUS & I915_READ(DSB_CTRL(pipe, dsb->id)); +} + struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9eb71a547103a..fba3e8b262965 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11698,4 +11698,11 @@ enum skl_power_gate { #define PORT_TX_DFLEXDPCSSS(fia) _MMIO_FIA((fia), 0x00894) #define DP_PHY_MODE_STATUS_NOT_SAFE(tc_port) (1 << (tc_port)) +/* This register controls the Display State Buffer (DSB) engines. */ +#define _DSBSL_INSTANCE_BASE 0x70B00 +#define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ + (pipe) * 0x1000 + (id) * 100) +#define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8) +#define DSB_STATUS (1 << 0) + #endif /* _I915_REG_H_ */ From f7619c47983431fc5ddfa30d8f921a6a40e0223a Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:26 +0530 Subject: [PATCH 216/331] drm/i915/dsb: functions to enable/disable DSB engine. DSB will be used for performance improvement for some special scenario. DSB engine will be enabled based on need and after completion of its work will be disabled. Api added for enable/disable operation by using DSB_CTRL register. v1: Initial version. v2: POSTING_READ added after writing control register. (Shashank) v3: cosmetic changes done. (Shashank) Cc: Michel Thierry Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-7-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 40 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 41 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 6000050b18a67..6fb4529689f18 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -26,6 +26,46 @@ static inline bool is_dsb_busy(struct intel_dsb *dsb) return DSB_STATUS & I915_READ(DSB_CTRL(pipe, dsb->id)); } +static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dsb_ctrl; + + dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id)); + if (DSB_STATUS & dsb_ctrl) { + DRM_DEBUG_KMS("DSB engine is busy.\n"); + return false; + } + + dsb_ctrl |= DSB_ENABLE; + I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl); + + POSTING_READ(DSB_CTRL(pipe, dsb->id)); + return true; +} + +static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dsb_ctrl; + + dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id)); + if (DSB_STATUS & dsb_ctrl) { + DRM_DEBUG_KMS("DSB engine is busy.\n"); + return false; + } + + dsb_ctrl &= ~DSB_ENABLE; + I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl); + + POSTING_READ(DSB_CTRL(pipe, dsb->id)); + return true; +} + struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fba3e8b262965..5fcbaaa8f3c61 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11703,6 +11703,7 @@ enum skl_power_gate { #define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ (pipe) * 0x1000 + (id) * 100) #define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8) +#define DSB_ENABLE (1 << 31) #define DSB_STATUS (1 << 0) #endif /* _I915_REG_H_ */ From 1abf329a713d2772257470873d57794d2404018d Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:27 +0530 Subject: [PATCH 217/331] drm/i915/dsb: function to trigger workload execution of DSB. Batch buffer will be created through dsb-reg-write function which can have single/multiple request based on usecase and once the buffer is ready commit function will trigger the execution of the batch buffer. All the registers will be updated simultaneously. v1: Initial version. v2: Optimized code few places. (Chris) v3: USed DRM_ERROR for dsb head/tail programming failure. (Shashank) v4: reset ins_start_offset after commit. (Jani) Cc: Imre Deak Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-8-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_dsb.c | 43 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dsb.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 2 ++ 3 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 6fb4529689f18..f4c0b37683a51 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -224,3 +224,46 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | i915_mmio_reg_offset(reg); } + +void intel_dsb_commit(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe = crtc->pipe; + u32 tail; + + if (!dsb->free_pos) + return; + + if (!intel_dsb_enable_engine(dsb)) + goto reset; + + if (is_dsb_busy(dsb)) { + DRM_ERROR("HEAD_PTR write failed - dsb engine is busy.\n"); + goto reset; + } + I915_WRITE(DSB_HEAD(pipe, dsb->id), i915_ggtt_offset(dsb->vma)); + + tail = ALIGN(dsb->free_pos * 4, CACHELINE_BYTES); + if (tail > dsb->free_pos * 4) + memset(&dsb->cmd_buf[dsb->free_pos], 0, + (tail - dsb->free_pos * 4)); + + if (is_dsb_busy(dsb)) { + DRM_ERROR("TAIL_PTR write failed - dsb engine is busy.\n"); + goto reset; + } + DRM_DEBUG_KMS("DSB execution started - head 0x%x, tail 0x%x\n", + i915_ggtt_offset(dsb->vma), tail); + I915_WRITE(DSB_TAIL(pipe, dsb->id), i915_ggtt_offset(dsb->vma) + tail); + if (wait_for(!is_dsb_busy(dsb), 1)) { + DRM_ERROR("Timed out waiting for DSB workload completion.\n"); + goto reset; + } + +reset: + dsb->free_pos = 0; + dsb->ins_start_offset = 0; + intel_dsb_disable_engine(dsb); +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index e4dd0ab799ec6..6f95c8e909e64 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -47,5 +47,6 @@ void intel_dsb_put(struct intel_dsb *dsb); void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); +void intel_dsb_commit(struct intel_dsb *dsb); #endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5fcbaaa8f3c61..8649a30289631 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11702,6 +11702,8 @@ enum skl_power_gate { #define _DSBSL_INSTANCE_BASE 0x70B00 #define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ (pipe) * 0x1000 + (id) * 100) +#define DSB_HEAD(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x0) +#define DSB_TAIL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x4) #define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8) #define DSB_ENABLE (1 << 31) #define DSB_STATUS (1 << 0) From 49e3fb7fd880f294643304f3f374d553dd6a4c11 Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:28 +0530 Subject: [PATCH 218/331] drm/i915/dsb: Enable gamma lut programming using DSB. Gamma lut programming can be programmed using DSB where bulk register programming can be done using indexed register write which takes number of data and the mmio offset to be written. Currently enabled for 12-bit gamma LUT which is enabled by default and later 8-bit/10-bit will be enabled in future based on need. v1: Initial version. v2: Directly call dsb-api at callsites. (Jani) v3: - modified the code as per single dsb instance per crtc. (Shashank) - Added dsb get/put call in platform specific load_lut hook. (Jani) - removed dsb pointer from dev_priv. (Jani) v4: simplified code by dropping ref-count implementation. (Shashank) Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-9-animesh.manna@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 64 ++++++++++++++-------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index b0268a9bde8a7..402151128e1fd 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -627,12 +627,13 @@ static void bdw_load_lut_10(struct intel_crtc *crtc, static void ivb_load_lut_ext_max(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Program the max register to clamp values > 1.0. */ - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); /* * Program the gc max 2 register to clamp values > 1.0. @@ -640,10 +641,15 @@ static void ivb_load_lut_ext_max(struct intel_crtc *crtc) * from 3.0 to 7.0 */ if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16); - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16); - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 0), + 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 1), + 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 2), + 1 << 16); } + + intel_dsb_put(dsb); } static void ivb_load_luts(const struct intel_crtc_state *crtc_state) @@ -803,22 +809,23 @@ icl_load_gcmax(const struct intel_crtc_state *crtc_state, const struct drm_color_lut *color) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red); - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green); - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 0), color->red); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 1), color->green); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 2), color->blue); + intel_dsb_put(dsb); } static void icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_property_blob *blob = crtc_state->base.gamma_lut; const struct drm_color_lut *lut = blob->data; + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; u32 i; @@ -829,26 +836,29 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) * 9 entries, corresponding to values 0, 1/(8 * 128 * 256), * 2/(8 * 128 * 256) ... 8/(8 * 128 * 256). */ - I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + intel_dsb_reg_write(dsb, PREC_PAL_MULTI_SEG_INDEX(pipe), + PAL_PREC_AUTO_INCREMENT); for (i = 0; i < 9; i++) { const struct drm_color_lut *entry = &lut[i]; - I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_udw(entry)); } + + intel_dsb_put(dsb); } static void icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_property_blob *blob = crtc_state->base.gamma_lut; const struct drm_color_lut *lut = blob->data; const struct drm_color_lut *entry; + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; u32 i; @@ -862,12 +872,13 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], * seg2[0] being unused by the hardware. */ - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); - + intel_dsb_reg_write(dsb, PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); for (i = 1; i < 257; i++) { entry = &lut[i * 8]; - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* @@ -884,20 +895,24 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) */ for (i = 0; i < 256; i++) { entry = &lut[i * 8 * 128]; - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* The last entry in the LUT is to be programmed in GCMAX */ entry = &lut[256 * 8 * 128]; icl_load_gcmax(crtc_state, entry); ivb_load_lut_ext_max(crtc); + intel_dsb_put(dsb); } static void icl_load_luts(const struct intel_crtc_state *crtc_state) { const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_dsb *dsb = intel_dsb_get(crtc); if (crtc_state->base.degamma_lut) glk_load_degamma_lut(crtc_state); @@ -914,6 +929,9 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); ivb_load_lut_ext_max(crtc); } + + intel_dsb_commit(dsb); + intel_dsb_put(dsb); } static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color) From dfaa6f285bd8eee63866df8b0474858e8a1cc40d Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:29 +0530 Subject: [PATCH 219/331] drm/i915/dsb: Enable DSB for gen12. Enabling DSB by setting 1 to has_dsb flag for gen12. Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-10-animesh.manna@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index fe6941c8fc99d..c2faa679658c0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -787,7 +787,8 @@ static const struct intel_device_info intel_elkhartlake_info = { [TRANSCODER_DSI_0] = TRANSCODER_DSI0_OFFSET, \ [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ - .has_global_mocs = 1 + .has_global_mocs = 1, \ + .display.has_dsb = 1 static const struct intel_device_info intel_tigerlake_12_info = { GEN12_FEATURES, From 5dd85e72bc922379fec10969c9390a72aaa4746f Mon Sep 17 00:00:00 2001 From: Animesh Manna Date: Fri, 20 Sep 2019 17:29:30 +0530 Subject: [PATCH 220/331] drm/i915/dsb: Documentation for DSB. Added docbook info regarding Display State Buffer(DSB) which is added from gen12 onwards to batch submit display HW programming. v1: Initial version as RFC. Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Shashank Sharma Reviewed-by: Shashank Sharma Signed-off-by: Animesh Manna Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920115930.27829-11-animesh.manna@intel.com --- Documentation/gpu/i915.rst | 9 ++++ drivers/gpu/drm/i915/display/intel_dsb.c | 68 ++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e249ea7b0ec70..465779670fd4f 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -246,6 +246,15 @@ Display PLLs .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.h :internal: +Display State Buffer +-------------------- + +.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c + :doc: DSB + +.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c + :internal: + Memory Management and Command Submission ======================================== diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index f4c0b37683a51..0a0a1536ac96e 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -9,6 +9,23 @@ #define DSB_BUF_SIZE (2 * PAGE_SIZE) +/** + * DOC: DSB + * + * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory + * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA + * engine that can be programmed to download the DSB from memory. + * It allows driver to batch submit display HW programming. This helps to + * reduce loading time and CPU activity, thereby making the context switch + * faster. DSB Support added from Gen12 Intel graphics based platform. + * + * DSB's can access only the pipe, plane, and transcoder Data Island Packet + * registers. + * + * DSB HW can support only register writes (both indexed and direct MMIO + * writes). There are no registers reads possible with DSB HW engine. + */ + /* DSB opcodes. */ #define DSB_OPCODE_SHIFT 24 #define DSB_OPCODE_MMIO_WRITE 0x1 @@ -66,6 +83,17 @@ static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb) return true; } +/** + * intel_dsb_get() - Allocate DSB context and return a DSB instance. + * @crtc: intel_crtc structure to get pipe info. + * + * This function provides handle of a DSB instance, for the further DSB + * operations. + * + * Returns: address of Intel_dsb instance requested for. + * Failure: Returns the same DSB instance, but without a command buffer. + */ + struct intel_dsb * intel_dsb_get(struct intel_crtc *crtc) { @@ -116,6 +144,14 @@ intel_dsb_get(struct intel_crtc *crtc) return dsb; } +/** + * intel_dsb_put() - To destroy DSB context. + * @dsb: intel_dsb structure. + * + * This function destroys the DSB context allocated by a dsb_get(), by + * unpinning and releasing the VMA object associated with it. + */ + void intel_dsb_put(struct intel_dsb *dsb) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); @@ -138,6 +174,19 @@ void intel_dsb_put(struct intel_dsb *dsb) } } +/** + * intel_dsb_indexed_reg_write() -Write to the DSB context for auto + * increment register. + * @dsb: intel_dsb structure. + * @reg: register address. + * @val: value. + * + * This function is used for writing register-value pair in command + * buffer of DSB for auto-increment register. During command buffer overflow, + * a warning is thrown and rest all erroneous condition register programming + * is done through mmio write. + */ + void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) { @@ -202,6 +251,18 @@ void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, buf[dsb->free_pos] = 0; } +/** + * intel_dsb_reg_write() -Write to the DSB context for normal + * register. + * @dsb: intel_dsb structure. + * @reg: register address. + * @val: value. + * + * This function is used for writing register-value pair in command + * buffer of DSB. During command buffer overflow, a warning is thrown + * and rest all erroneous condition register programming is done + * through mmio write. + */ void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); @@ -225,6 +286,13 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) i915_mmio_reg_offset(reg); } +/** + * intel_dsb_commit() - Trigger workload execution of DSB. + * @dsb: intel_dsb structure. + * + * This function is used to do actual write to hardware using DSB. + * On errors, fall back to MMIO. Also this function help to reset the context. + */ void intel_dsb_commit(struct intel_dsb *dsb) { struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); From 87c1694533c947bf950251df3da04a32a05ede64 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 20 Sep 2019 11:39:18 +0300 Subject: [PATCH 221/331] drm/i915: save AUD_FREQ_CNTRL state at audio domain suspend When audio power domain is suspended, the display driver must save state of AUD_FREQ_CNTRL on Tiger Lake and Ice Lake systems. The initial value of the register is set by BIOS and is read by driver during the audio component init sequence. Cc: Jani Nikula Cc: Imre Deak Signed-off-by: Kai Vehmanen Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920083918.27057-1-kai.vehmanen@linux.intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 17 +++++++++++++++-- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index aac089c79ceb6..54638d99e0217 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -852,10 +852,17 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); - /* Force CDCLK to 2*BCLK as long as we need audio to be powered. */ - if (dev_priv->audio_power_refcount++ == 0) + if (dev_priv->audio_power_refcount++ == 0) { + if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { + I915_WRITE(AUD_FREQ_CNTRL, dev_priv->audio_freq_cntrl); + DRM_DEBUG_KMS("restored AUD_FREQ_CNTRL to 0x%x\n", + dev_priv->audio_freq_cntrl); + } + + /* Force CDCLK to 2*BCLK as long as we need audio powered. */ if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, true); + } return ret; } @@ -1116,6 +1123,12 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv) return; } + if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { + dev_priv->audio_freq_cntrl = I915_READ(AUD_FREQ_CNTRL); + DRM_DEBUG_KMS("init value of AUD_FREQ_CNTRL of 0x%x\n", + dev_priv->audio_freq_cntrl); + } + dev_priv->audio_component_registered = true; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07f1e89a55ca0..fcf7423075ef1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1541,6 +1541,7 @@ struct drm_i915_private { */ struct mutex av_mutex; int audio_power_refcount; + u32 audio_freq_cntrl; struct { struct mutex mutex; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8649a30289631..6ecb64c042efd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9127,6 +9127,8 @@ enum { #define HSW_AUD_CHICKENBIT _MMIO(0x65f10) #define SKL_AUD_CODEC_WAKE_SIGNAL (1 << 15) +#define AUD_FREQ_CNTRL _MMIO(0x65900) + /* * HSW - ICL power wells * From 2d6f6f359fd841a6ce3133470859035037ff0f75 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Sep 2019 21:54:16 +0300 Subject: [PATCH 222/331] drm/i915: add i915_driver_modeset_remove() For completeness, add counterpart to i915_driver_modeset_probe() and remove the asymmetry in the probe/remove parts. No functional changes. Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920185421.17822-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c95986a984c81..acccf852e654b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -447,6 +447,20 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) return ret; } +static void i915_driver_modeset_remove(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + intel_modeset_driver_remove(&i915->drm); + + intel_bios_driver_remove(i915); + + vga_switcheroo_unregister_client(pdev); + vga_client_register(pdev, NULL, NULL, NULL); + + intel_csr_ucode_fini(i915); +} + static void intel_init_dpio(struct drm_i915_private *dev_priv) { /* @@ -1623,8 +1637,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) void i915_driver_remove(struct drm_i915_private *i915) { - struct pci_dev *pdev = i915->drm.pdev; - disable_rpm_wakeref_asserts(&i915->runtime_pm); i915_driver_unregister(i915); @@ -1645,14 +1657,7 @@ void i915_driver_remove(struct drm_i915_private *i915) intel_gvt_driver_remove(i915); - intel_modeset_driver_remove(&i915->drm); - - intel_bios_driver_remove(i915); - - vga_switcheroo_unregister_client(pdev); - vga_client_register(pdev, NULL, NULL, NULL); - - intel_csr_ucode_fini(i915); + i915_driver_modeset_remove(i915); /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&i915->gt.hangcheck.work); From 5bcd53aa39f3dd054328e0a29a343e0254709908 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Sep 2019 21:54:17 +0300 Subject: [PATCH 223/331] drm/i915: pass i915 to i915_driver_modeset_probe() In general, prefer struct drm_i915_private * over struct drm_device * when either will do. Rename the local variable to i915. No functional changes. Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920185421.17822-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 59 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index acccf852e654b..fbf1b9e1e0598 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -329,23 +329,22 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { .can_switch = i915_switcheroo_can_switch, }; -static int i915_driver_modeset_probe(struct drm_device *dev) +static int i915_driver_modeset_probe(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct pci_dev *pdev = dev_priv->drm.pdev; + struct pci_dev *pdev = i915->drm.pdev; int ret; - if (i915_inject_probe_failure(dev_priv)) + if (i915_inject_probe_failure(i915)) return -ENODEV; - if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { - ret = drm_vblank_init(&dev_priv->drm, - INTEL_NUM_PIPES(dev_priv)); + if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + ret = drm_vblank_init(&i915->drm, + INTEL_NUM_PIPES(i915)); if (ret) goto out; } - intel_bios_init(dev_priv); + intel_bios_init(i915); /* If we have > 1 VGA cards, then we need to arbitrate access * to the common VGA resources. @@ -354,7 +353,7 @@ static int i915_driver_modeset_probe(struct drm_device *dev) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - ret = vga_client_register(pdev, dev_priv, NULL, i915_vga_set_decode); + ret = vga_client_register(pdev, i915, NULL, i915_vga_set_decode); if (ret && ret != -ENODEV) goto out; @@ -365,56 +364,56 @@ static int i915_driver_modeset_probe(struct drm_device *dev) goto cleanup_vga_client; /* must happen before intel_power_domains_init_hw() on VLV/CHV */ - intel_update_rawclk(dev_priv); + intel_update_rawclk(i915); - intel_power_domains_init_hw(dev_priv, false); + intel_power_domains_init_hw(i915, false); - intel_csr_ucode_init(dev_priv); + intel_csr_ucode_init(i915); - ret = intel_irq_install(dev_priv); + ret = intel_irq_install(i915); if (ret) goto cleanup_csr; - intel_gmbus_setup(dev_priv); + intel_gmbus_setup(i915); /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ - ret = intel_modeset_init(dev); + ret = intel_modeset_init(&i915->drm); if (ret) goto cleanup_irq; - ret = i915_gem_init(dev_priv); + ret = i915_gem_init(i915); if (ret) goto cleanup_modeset; - intel_overlay_setup(dev_priv); + intel_overlay_setup(i915); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(i915) || !INTEL_DISPLAY_ENABLED(i915)) return 0; - ret = intel_fbdev_init(dev); + ret = intel_fbdev_init(&i915->drm); if (ret) goto cleanup_gem; /* Only enable hotplug handling once the fbdev is fully set up. */ - intel_hpd_init(dev_priv); + intel_hpd_init(i915); - intel_init_ipc(dev_priv); + intel_init_ipc(i915); return 0; cleanup_gem: - i915_gem_suspend(dev_priv); - i915_gem_driver_remove(dev_priv); - i915_gem_driver_release(dev_priv); + i915_gem_suspend(i915); + i915_gem_driver_remove(i915); + i915_gem_driver_release(i915); cleanup_modeset: - intel_modeset_driver_remove(dev); + intel_modeset_driver_remove(&i915->drm); cleanup_irq: - intel_irq_uninstall(dev_priv); - intel_gmbus_teardown(dev_priv); + intel_irq_uninstall(i915); + intel_gmbus_teardown(i915); cleanup_csr: - intel_csr_ucode_fini(dev_priv); - intel_power_domains_driver_remove(dev_priv); + intel_csr_ucode_fini(i915); + intel_power_domains_driver_remove(i915); vga_switcheroo_unregister_client(pdev); cleanup_vga_client: vga_client_register(pdev, NULL, NULL, NULL); @@ -1607,7 +1606,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) goto out_cleanup_mmio; - ret = i915_driver_modeset_probe(&dev_priv->drm); + ret = i915_driver_modeset_probe(dev_priv); if (ret < 0) goto out_cleanup_hw; From 9980c3c11060e177e6ccb0012371ade24a9543a4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Sep 2019 21:54:18 +0300 Subject: [PATCH 224/331] drm/i915: pass i915 to intel_modeset_driver_remove() In general, prefer struct drm_i915_private * over struct drm_device * when either will do. Rename the local variable to i915. Also propagate to intel_hpd_poll_fini(). No functional changes. Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920185421.17822-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 38 ++++++++++---------- drivers/gpu/drm/i915/display/intel_display.h | 2 +- drivers/gpu/drm/i915/i915_drv.c | 4 +-- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f5f655cde43a9..74da12d3911c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -17114,13 +17114,13 @@ void intel_display_resume(struct drm_device *dev) drm_atomic_state_put(state); } -static void intel_hpd_poll_fini(struct drm_device *dev) +static void intel_hpd_poll_fini(struct drm_i915_private *i915) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; /* Kill all the work that may have been queued by hpd. */ - drm_connector_list_iter_begin(dev, &conn_iter); + drm_connector_list_iter_begin(&i915->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) cancel_work_sync(&connector->modeset_retry_work); @@ -17132,51 +17132,49 @@ static void intel_hpd_poll_fini(struct drm_device *dev) drm_connector_list_iter_end(&conn_iter); } -void intel_modeset_driver_remove(struct drm_device *dev) +void intel_modeset_driver_remove(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - - flush_workqueue(dev_priv->flip_wq); - flush_workqueue(dev_priv->modeset_wq); + flush_workqueue(i915->flip_wq); + flush_workqueue(i915->modeset_wq); - flush_work(&dev_priv->atomic_helper.free_work); - WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list)); + flush_work(&i915->atomic_helper.free_work); + WARN_ON(!llist_empty(&i915->atomic_helper.free_list)); /* * Interrupts and polling as the first thing to avoid creating havoc. * Too much stuff here (turning of connectors, ...) would * experience fancy races otherwise. */ - intel_irq_uninstall(dev_priv); + intel_irq_uninstall(i915); /* * Due to the hpd irq storm handling the hotplug work can re-arm the * poll handlers. Hence disable polling after hpd handling is shut down. */ - intel_hpd_poll_fini(dev); + intel_hpd_poll_fini(i915); /* poll work can call into fbdev, hence clean that up afterwards */ - intel_fbdev_fini(dev_priv); + intel_fbdev_fini(i915); intel_unregister_dsm_handler(); - intel_fbc_global_disable(dev_priv); + intel_fbc_global_disable(i915); /* flush any delayed tasks or pending work */ flush_scheduled_work(); - intel_hdcp_component_fini(dev_priv); + intel_hdcp_component_fini(i915); - drm_mode_config_cleanup(dev); + drm_mode_config_cleanup(&i915->drm); - intel_overlay_cleanup(dev_priv); + intel_overlay_cleanup(i915); - intel_gmbus_teardown(dev_priv); + intel_gmbus_teardown(i915); - destroy_workqueue(dev_priv->flip_wq); - destroy_workqueue(dev_priv->modeset_wq); + destroy_workqueue(i915->flip_wq); + destroy_workqueue(i915->modeset_wq); - intel_fbc_cleanup_cfb(dev_priv); + intel_fbc_cleanup_cfb(i915); } /* diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index b1ae0e59c7153..933cbe36bb59f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -578,7 +578,7 @@ void intel_display_print_error_state(struct drm_i915_error_state_buf *e, /* modesetting */ void intel_modeset_init_hw(struct drm_device *dev); int intel_modeset_init(struct drm_device *dev); -void intel_modeset_driver_remove(struct drm_device *dev); +void intel_modeset_driver_remove(struct drm_i915_private *i915); int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state); void intel_display_resume(struct drm_device *dev); void i915_redisable_vga(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index fbf1b9e1e0598..9b1788b4589ed 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -407,7 +407,7 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) i915_gem_driver_remove(i915); i915_gem_driver_release(i915); cleanup_modeset: - intel_modeset_driver_remove(&i915->drm); + intel_modeset_driver_remove(i915); cleanup_irq: intel_irq_uninstall(i915); intel_gmbus_teardown(i915); @@ -450,7 +450,7 @@ static void i915_driver_modeset_remove(struct drm_i915_private *i915) { struct pci_dev *pdev = i915->drm.pdev; - intel_modeset_driver_remove(&i915->drm); + intel_modeset_driver_remove(i915); intel_bios_driver_remove(i915); From 064bd628fde6665c38d8aa02ff7a0a9ad36db482 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Sep 2019 21:54:19 +0300 Subject: [PATCH 225/331] drm/i915: abstract intel_panel_sanitize_ssc() from intel_modeset_init() The code is too specific and detailed to have open in a high level function. Abstract away. As a drive-by improvement switch to using enableddisabled() in logging and git rid of a redundant !!. No functional changes. v2: drop the !! while at it too (Chris) Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920185421.17822-4-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 39 +++++++++++--------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 74da12d3911c6..9df7ee5a26bbc 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7552,6 +7552,27 @@ intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, constant_n); } +static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv) +{ + /* + * There may be no VBT; and if the BIOS enabled SSC we can + * just keep using it to avoid unnecessary flicker. Whereas if the + * BIOS isn't using it, don't assume it will work even if the VBT + * indicates as much. + */ + if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { + bool bios_lvds_use_ssc = I915_READ(PCH_DREF_CONTROL) & + DREF_SSC1_ENABLE; + + if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { + DRM_DEBUG_KMS("SSC %s by BIOS, overriding VBT which says %s\n", + enableddisabled(bios_lvds_use_ssc), + enableddisabled(dev_priv->vbt.lvds_use_ssc)); + dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; + } + } +} + static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { if (i915_modparams.panel_use_ssc >= 0) @@ -16203,23 +16224,7 @@ int intel_modeset_init(struct drm_device *dev) intel_init_pm(dev_priv); - /* - * There may be no VBT; and if the BIOS enabled SSC we can - * just keep using it to avoid unnecessary flicker. Whereas if the - * BIOS isn't using it, don't assume it will work even if the VBT - * indicates as much. - */ - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { - bool bios_lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & - DREF_SSC1_ENABLE); - - if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { - DRM_DEBUG_KMS("SSC %sabled by BIOS, overriding VBT which says %sabled\n", - bios_lvds_use_ssc ? "en" : "dis", - dev_priv->vbt.lvds_use_ssc ? "en" : "dis"); - dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; - } - } + intel_panel_sanitize_ssc(dev_priv); /* * Maximum framebuffer dimensions, chosen to match From e1a3d9895ddcec20cbb7353e77759589cf010275 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Sep 2019 21:54:20 +0300 Subject: [PATCH 226/331] drm/i915: abstract intel_mode_config_init() from intel_modeset_init() The i915 specific mode config init code is too specific and detailed to have open in a high level function. Abstract away. No functional changes. v2: nest drm_mode_config_init() in the function too (Chris) Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920185421.17822-5-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 87 +++++++++++--------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 9df7ee5a26bbc..06946f9ffd27b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16187,6 +16187,52 @@ static int intel_initial_commit(struct drm_device *dev) return ret; } +static void intel_mode_config_init(struct drm_i915_private *i915) +{ + struct drm_mode_config *mode_config = &i915->drm.mode_config; + + drm_mode_config_init(&i915->drm); + + mode_config->min_width = 0; + mode_config->min_height = 0; + + mode_config->preferred_depth = 24; + mode_config->prefer_shadow = 1; + + mode_config->allow_fb_modifiers = true; + + mode_config->funcs = &intel_mode_funcs; + + /* + * Maximum framebuffer dimensions, chosen to match + * the maximum render engine surface size on gen4+. + */ + if (INTEL_GEN(i915) >= 7) { + mode_config->max_width = 16384; + mode_config->max_height = 16384; + } else if (INTEL_GEN(i915) >= 4) { + mode_config->max_width = 8192; + mode_config->max_height = 8192; + } else if (IS_GEN(i915, 3)) { + mode_config->max_width = 4096; + mode_config->max_height = 4096; + } else { + mode_config->max_width = 2048; + mode_config->max_height = 2048; + } + + if (IS_I845G(i915) || IS_I865G(i915)) { + mode_config->cursor_width = IS_I845G(i915) ? 64 : 512; + mode_config->cursor_height = 1023; + } else if (IS_GEN(i915, 2)) { + mode_config->cursor_width = 64; + mode_config->cursor_height = 64; + } else { + mode_config->cursor_width = 256; + mode_config->cursor_height = 256; + } +} + int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16198,22 +16244,12 @@ int intel_modeset_init(struct drm_device *dev) dev_priv->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); - drm_mode_config_init(dev); + intel_mode_config_init(dev_priv); ret = intel_bw_init(dev_priv); if (ret) return ret; - dev->mode_config.min_width = 0; - dev->mode_config.min_height = 0; - - dev->mode_config.preferred_depth = 24; - dev->mode_config.prefer_shadow = 1; - - dev->mode_config.allow_fb_modifiers = true; - - dev->mode_config.funcs = &intel_mode_funcs; - init_llist_head(&dev_priv->atomic_helper.free_list); INIT_WORK(&dev_priv->atomic_helper.free_work, intel_atomic_helper_free_state_worker); @@ -16226,35 +16262,6 @@ int intel_modeset_init(struct drm_device *dev) intel_panel_sanitize_ssc(dev_priv); - /* - * Maximum framebuffer dimensions, chosen to match - * the maximum render engine surface size on gen4+. - */ - if (INTEL_GEN(dev_priv) >= 7) { - dev->mode_config.max_width = 16384; - dev->mode_config.max_height = 16384; - } else if (INTEL_GEN(dev_priv) >= 4) { - dev->mode_config.max_width = 8192; - dev->mode_config.max_height = 8192; - } else if (IS_GEN(dev_priv, 3)) { - dev->mode_config.max_width = 4096; - dev->mode_config.max_height = 4096; - } else { - dev->mode_config.max_width = 2048; - dev->mode_config.max_height = 2048; - } - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { - dev->mode_config.cursor_width = IS_I845G(dev_priv) ? 64 : 512; - dev->mode_config.cursor_height = 1023; - } else if (IS_GEN(dev_priv, 2)) { - dev->mode_config.cursor_width = 64; - dev->mode_config.cursor_height = 64; - } else { - dev->mode_config.cursor_width = 256; - dev->mode_config.cursor_height = 256; - } - DRM_DEBUG_KMS("%d display pipe%s available.\n", INTEL_NUM_PIPES(dev_priv), INTEL_NUM_PIPES(dev_priv) > 1 ? "s" : ""); From 6cd02e77757a8fb0089053045932455355d2c4fd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Sep 2019 21:54:21 +0300 Subject: [PATCH 227/331] drm/i915: pass i915 to intel_modeset_init() and intel_modeset_init_hw() In general, prefer struct drm_i915_private * over struct drm_device * when either will do. Rename the local variables to i915. No functional changes. Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190920185421.17822-6-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 69 ++++++++++---------- drivers/gpu/drm/i915/display/intel_display.h | 4 +- drivers/gpu/drm/i915/i915_drv.c | 4 +- 3 files changed, 37 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 06946f9ffd27b..5ecf542701811 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4360,7 +4360,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) * so need a full re-initialization. */ intel_pps_unlock_regs_wa(dev_priv); - intel_modeset_init_hw(dev); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); @@ -16021,13 +16021,11 @@ static void i915_disable_vga(struct drm_i915_private *dev_priv) POSTING_READ(vga_reg); } -void intel_modeset_init_hw(struct drm_device *dev) +void intel_modeset_init_hw(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; + intel_update_cdclk(i915); + intel_dump_cdclk_state(&i915->cdclk.hw, "Current CDCLK"); + i915->cdclk.logical = i915->cdclk.actual = i915->cdclk.hw; } /* @@ -16233,42 +16231,42 @@ static void intel_mode_config_init(struct drm_i915_private *i915) } } -int intel_modeset_init(struct drm_device *dev) +int intel_modeset_init(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_device *dev = &i915->drm; enum pipe pipe; struct intel_crtc *crtc; int ret; - dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); - dev_priv->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | - WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); + i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + i915->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | + WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); - intel_mode_config_init(dev_priv); + intel_mode_config_init(i915); - ret = intel_bw_init(dev_priv); + ret = intel_bw_init(i915); if (ret) return ret; - init_llist_head(&dev_priv->atomic_helper.free_list); - INIT_WORK(&dev_priv->atomic_helper.free_work, + init_llist_head(&i915->atomic_helper.free_list); + INIT_WORK(&i915->atomic_helper.free_work, intel_atomic_helper_free_state_worker); - intel_init_quirks(dev_priv); + intel_init_quirks(i915); - intel_fbc_init(dev_priv); + intel_fbc_init(i915); - intel_init_pm(dev_priv); + intel_init_pm(i915); - intel_panel_sanitize_ssc(dev_priv); + intel_panel_sanitize_ssc(i915); DRM_DEBUG_KMS("%d display pipe%s available.\n", - INTEL_NUM_PIPES(dev_priv), - INTEL_NUM_PIPES(dev_priv) > 1 ? "s" : ""); + INTEL_NUM_PIPES(i915), + INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); - if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { - for_each_pipe(dev_priv, pipe) { - ret = intel_crtc_init(dev_priv, pipe); + if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + for_each_pipe(i915, pipe) { + ret = intel_crtc_init(i915, pipe); if (ret) { drm_mode_config_cleanup(dev); return ret; @@ -16277,19 +16275,19 @@ int intel_modeset_init(struct drm_device *dev) } intel_shared_dpll_init(dev); - intel_update_fdi_pll_freq(dev_priv); + intel_update_fdi_pll_freq(i915); - intel_update_czclk(dev_priv); - intel_modeset_init_hw(dev); + intel_update_czclk(i915); + intel_modeset_init_hw(i915); - intel_hdcp_component_init(dev_priv); + intel_hdcp_component_init(i915); - if (dev_priv->max_cdclk_freq == 0) - intel_update_max_cdclk(dev_priv); + if (i915->max_cdclk_freq == 0) + intel_update_max_cdclk(i915); /* Just disable it once at startup */ - i915_disable_vga(dev_priv); - intel_setup_outputs(dev_priv); + i915_disable_vga(i915); + intel_setup_outputs(i915); drm_modeset_lock_all(dev); intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx); @@ -16308,8 +16306,7 @@ int intel_modeset_init(struct drm_device *dev) * can even allow for smooth boot transitions if the BIOS * fb is large enough for the active pipe configuration. */ - dev_priv->display.get_initial_plane_config(crtc, - &plane_config); + i915->display.get_initial_plane_config(crtc, &plane_config); /* * If the fb is shared between multiple heads, we'll @@ -16323,7 +16320,7 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - if (!HAS_GMCH(dev_priv)) + if (!HAS_GMCH(i915)) sanitize_watermarks(dev); /* diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 933cbe36bb59f..5cea6f8e107ae 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -576,8 +576,8 @@ void intel_display_print_error_state(struct drm_i915_error_state_buf *e, struct intel_display_error_state *error); /* modesetting */ -void intel_modeset_init_hw(struct drm_device *dev); -int intel_modeset_init(struct drm_device *dev); +void intel_modeset_init_hw(struct drm_i915_private *i915); +int intel_modeset_init(struct drm_i915_private *i915); void intel_modeset_driver_remove(struct drm_i915_private *i915); int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state); void intel_display_resume(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9b1788b4589ed..dd9613e457239 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -378,7 +378,7 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ - ret = intel_modeset_init(&i915->drm); + ret = intel_modeset_init(i915); if (ret) goto cleanup_irq; @@ -1946,7 +1946,7 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev_priv); - intel_modeset_init_hw(dev); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); From ae911b23d2f06c5d0a3e32768bedea857cadd269 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 12:00:53 +0100 Subject: [PATCH 228/331] drm/i915/execlists: Relax assertion for a pinned context image on reset A gpu hang can occur at any time, given a sufficiently angry gpu. An example is when it forgets to perform a context-switch at the end of a request, leaving us with a hanging GPU on a completed request. Here, we may retire the request, only leaving its context alive via the active barrier. When we reset the GPU on a completed request, we do not modify its context image (just updating the ring state) and can safely defer the assertion that we have the image pinned and ready to modify. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111639 Fixes: dffa8feb3084 ("drm/i915/perf: Assert locking for i915_init_oa_perf_state()") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1a2b71157f08d..80ded99bcb157 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2383,7 +2383,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); /* Proclaim we have exclusive access to the context image! */ - GEM_BUG_ON(!intel_context_is_pinned(ce)); mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_); rq = active_request(rq); @@ -2432,6 +2431,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ + GEM_BUG_ON(!intel_context_is_pinned(ce)); regs = ce->lrc_reg_state; if (engine->pinned_default_state) { memcpy(regs, /* skip restoring the vanilla PPHWSP */ From 3231f8c01121ee1febfd82398ee22f7ff9dc5d76 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 12:00:54 +0100 Subject: [PATCH 229/331] drm/i915/execlists: Drop redundant list_del_init(&rq->sched.link) Since amalgamating the queued and active lists in commit 422d7df4f090 ("drm/i915: Replace engine->timeline with a plain list"), performing a i915_request_submit() will remove the request from the execlists priority queue. References: 422d7df4f090 ("drm/i915: Replace engine->timeline with a plain list") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 80ded99bcb157..0a4812ebd184e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2514,7 +2514,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - list_del_init(&rq->sched.link); __i915_request_submit(rq); dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); From c0bb487dc19fc45dbeede7dcf8f513df51a3cd33 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 12:00:55 +0100 Subject: [PATCH 230/331] drm/i915: Only enqueue already completed requests If we are asked to submit a completed request, just move it onto the active-list without modifying it's payload. If we try to emit the modified payload of a completed request, we risk racing with the ring->head update during retirement which may advance the head past our breadcrumb and so we generate a warning for the emission being behind the RING_HEAD. v2: Commentary for the sneaky, shared responsibility between functions. v3: Spelling mistakes and bonus assertion Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 66 ++++++++++++++++++----------- drivers/gpu/drm/i915/i915_request.c | 44 +++++++++++++------ drivers/gpu/drm/i915/i915_request.h | 2 +- 3 files changed, 74 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0a4812ebd184e..bc9badd9c046d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -799,6 +799,17 @@ static bool can_merge_rq(const struct i915_request *prev, GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); + /* + * We do not submit known completed requests. Therefore if the next + * request is already completed, we can pretend to merge it in + * with the previous context (and we will skip updating the ELSP + * and tracking). Thus hopefully keeping the ELSP full with active + * contexts, despite the best efforts of preempt-to-busy to confuse + * us. + */ + if (i915_request_completed(next)) + return true; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -1181,21 +1192,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } - if (i915_request_completed(rq)) { - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - rq->engine = engine; - __i915_request_submit(rq); - - spin_unlock(&ve->base.active.lock); - - rb = rb_first_cached(&execlists->virtual); - continue; - } - if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); return; /* leave this for another */ @@ -1249,11 +1245,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(ve->siblings[0] != engine); } - __i915_request_submit(rq); - if (!i915_request_completed(rq)) { + if (__i915_request_submit(rq)) { submit = true; last = rq; } + + /* + * Hmm, we have a bunch of virtual engine requests, + * but the first one was already completed (thanks + * preempt-to-busy!). Keep looking at the veng queue + * until we have no more relevant requests (i.e. + * the normal submit queue has higher priority). + */ + if (!submit) { + spin_unlock(&ve->base.active.lock); + rb = rb_first_cached(&execlists->virtual); + continue; + } } spin_unlock(&ve->base.active.lock); @@ -1266,8 +1274,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (i915_request_completed(rq)) - goto skip; + bool merge = true; /* * Can we combine this request with the current port? @@ -1308,14 +1315,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - *port = execlists_schedule_in(last, port - execlists->pending); - port++; + merge = false; } - last = rq; - submit = true; -skip: - __i915_request_submit(rq); + if (__i915_request_submit(rq)) { + if (!merge) { + *port = execlists_schedule_in(last, port - execlists->pending); + port++; + last = NULL; + } + + GEM_BUG_ON(last && + !can_merge_ctx(last->hw_context, + rq->hw_context)); + + submit = true; + last = rq; + } } rb_erase_cached(&p->node, &execlists->queue); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9bd8538b19075..a8916412759b3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -void __i915_request_submit(struct i915_request *request) +bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + bool result = false; GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, @@ -389,6 +390,25 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); + /* + * With the advent of preempt-to-busy, we frequently encounter + * requests that we have unsubmitted from HW, but left running + * until the next ack and so have completed in the meantime. On + * resubmission of that completed request, we can skip + * updating the payload, and execlists can even skip submitting + * the request. + * + * We must remove the request from the caller's priority queue, + * and the caller must only call us when the request is in their + * priority queue, under the active.lock. This ensures that the + * request has *not* yet been retired and we can safely move + * the request into the engine->active.list where it will be + * dropped upon retiring. (Otherwise if resubmit a *retired* + * request, this would be a horrible use-after-free.) + */ + if (i915_request_completed(request)) + goto xfer; + if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -412,13 +432,18 @@ void __i915_request_submit(struct i915_request *request) i915_sw_fence_signaled(&request->semaphore)) engine->saturated |= request->sched.semaphores; - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); + + trace_i915_request_execute(request); + engine->serial++; + result = true; - list_move_tail(&request->sched.link, &engine->active.requests); +xfer: /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + list_move_tail(&request->sched.link, &engine->active.requests); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && @@ -429,12 +454,7 @@ void __i915_request_submit(struct i915_request *request) spin_unlock(&request->lock); - engine->emit_fini_breadcrumb(request, - request->ring->vaddr + request->postfix); - - engine->serial++; - - trace_i915_request_execute(request); + return result; } void i915_request_submit(struct i915_request *request) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index b18f49528dedd..ec5bb4c2e5aeb 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq, void i915_request_add(struct i915_request *rq); -void __i915_request_submit(struct i915_request *request); +bool __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); void i915_request_skip(struct i915_request *request, int error); From 0d7cf7bc15e75bf79f2f65d61d19f896609f816a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 12:00:56 +0100 Subject: [PATCH 231/331] drm/i915/execlists: Refactor -EIO markup of hung requests Pull setting -EIO on the hung requests into its own utility function. Having allowed ourselves to short-circuit submission of completed requests, we can now do the mark_eio() prior to submission and avoid some redundant operations. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bc9badd9c046d..bff4c6a735cfc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -234,6 +234,13 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); +static void mark_eio(struct i915_request *rq) +{ + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); +} + static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -2517,12 +2524,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - - i915_request_mark_complete(rq); - } + list_for_each_entry(rq, &engine->active.requests, sched.link) + mark_eio(rq); /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { @@ -2530,9 +2533,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + mark_eio(rq); __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -2548,13 +2550,14 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) RB_CLEAR_NODE(rb); spin_lock(&ve->base.active.lock); - if (ve->request) { - ve->request->engine = engine; - __i915_request_submit(ve->request); - dma_fence_set_error(&ve->request->fence, -EIO); - i915_request_mark_complete(ve->request); + rq = fetch_and_zero(&ve->request); + if (rq) { + mark_eio(rq); + + rq->engine = engine; + __i915_request_submit(rq); + ve->base.execlists.queue_priority_hint = INT_MIN; - ve->request = NULL; } spin_unlock(&ve->base.active.lock); } From 6171e58b1ff5c41632d8fb87f0e6bd003ed34c13 Mon Sep 17 00:00:00 2001 From: Clinton A Taylor Date: Fri, 20 Sep 2019 13:58:05 -0700 Subject: [PATCH 232/331] drm/i915/tgl: Add missing ddi clock select during DP init sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 4.b was complete missed because it is only required to TC and TBT. Bspec: 49190 Reviewed-by: Imre Deak Reviewed-by: Lucas De Marchi Signed-off-by: Clinton A Taylor Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190920205810.211048-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 0c0da9f6c2e88..dfd6b064cbc3d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3230,11 +3230,14 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_edp_panel_on(intel_dp); /* - * 1.b, 3. and 4. is done before tgl_ddi_pre_enable_dp() by: + * 1.b, 3. and 4.a is done before tgl_ddi_pre_enable_dp() by: * haswell_crtc_enable()->intel_encoders_pre_pll_enable() and * haswell_crtc_enable()->intel_enable_shared_dpll() */ + /* 4.b */ + intel_ddi_clk_select(encoder, crtc_state); + /* 5. */ if (!intel_phy_is_tc(dev_priv, phy) || dig_port->tc_mode != TC_PORT_TBT_ALT) From 31d9ae9d734226b2ce3a1bc2ce1ed29a6997d556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 20 Sep 2019 13:58:06 -0700 Subject: [PATCH 233/331] drm/i915/tgl: Finish modular FIA support on registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If platform supports and has modular FIA is enabled, the registers bits also change, example: reading TC3 registers with modular FIA enabled, driver should read from FIA2 but with TC1 bits offsets. It is described in BSpec 50231 for DFLEXDPSP, other registers don't have the BSpec description but testing in real hardware have proven that it had moved for all other registers too. v2: - Caching index in tc_phy_fia_idx, instead of calculate it each time v3: - Setting tc_phy_fia and tc_phy_fia_idx in the same function Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190920205810.211048-3-jose.souza@intel.com --- .../drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_tc.c | 72 ++++++++++--------- drivers/gpu/drm/i915/i915_reg.h | 28 ++++---- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 49c902b00484c..6b0a646f0170a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1288,6 +1288,7 @@ struct intel_digital_port { char tc_port_name[8]; enum tc_port_mode tc_mode; enum phy_fia tc_phy_fia; + u8 tc_phy_fia_idx; void (*write_infoframe)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 85743a43bee20..f923f9cbd33c3 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -23,32 +23,38 @@ static const char *tc_port_mode_name(enum tc_port_mode mode) return names[mode]; } -static bool has_modular_fia(struct drm_i915_private *i915) -{ - if (!INTEL_INFO(i915)->display.has_modular_fia) - return false; - - return intel_uncore_read(&i915->uncore, - PORT_TX_DFLEXDPSP(FIA1)) & MODULAR_FIA_MASK; -} - -static enum phy_fia tc_port_to_fia(struct drm_i915_private *i915, - enum tc_port tc_port) +static void +tc_port_load_fia_params(struct drm_i915_private *i915, + struct intel_digital_port *dig_port) { - if (!has_modular_fia(i915)) - return FIA1; + enum port port = dig_port->base.port; + enum tc_port tc_port = intel_port_to_tc(i915, port); + u32 modular_fia; + + if (INTEL_INFO(i915)->display.has_modular_fia) { + modular_fia = intel_uncore_read(&i915->uncore, + PORT_TX_DFLEXDPSP(FIA1)); + modular_fia &= MODULAR_FIA_MASK; + } else { + modular_fia = 0; + } /* * Each Modular FIA instance houses 2 TC ports. In SOC that has more * than two TC ports, there are multiple instances of Modular FIA. */ - return tc_port / 2; + if (modular_fia) { + dig_port->tc_phy_fia = tc_port / 2; + dig_port->tc_phy_fia_idx = tc_port % 2; + } else { + dig_port->tc_phy_fia = FIA1; + dig_port->tc_phy_fia_idx = tc_port; + } } u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 lane_mask; @@ -57,8 +63,8 @@ u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) WARN_ON(lane_mask == 0xffffffff); - return (lane_mask & DP_LANE_ASSIGNMENT_MASK(tc_port)) >> - DP_LANE_ASSIGNMENT_SHIFT(tc_port); + lane_mask &= DP_LANE_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx); + return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); } int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port) @@ -95,7 +101,6 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -104,19 +109,21 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, val = intel_uncore_read(uncore, PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia)); - val &= ~DFLEXDPMLE1_DPMLETC_MASK(tc_port); + val &= ~DFLEXDPMLE1_DPMLETC_MASK(dig_port->tc_phy_fia_idx); switch (required_lanes) { case 1: - val |= lane_reversal ? DFLEXDPMLE1_DPMLETC_ML3(tc_port) : - DFLEXDPMLE1_DPMLETC_ML0(tc_port); + val |= lane_reversal ? + DFLEXDPMLE1_DPMLETC_ML3(dig_port->tc_phy_fia_idx) : + DFLEXDPMLE1_DPMLETC_ML0(dig_port->tc_phy_fia_idx); break; case 2: - val |= lane_reversal ? DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) : - DFLEXDPMLE1_DPMLETC_ML1_0(tc_port); + val |= lane_reversal ? + DFLEXDPMLE1_DPMLETC_ML3_2(dig_port->tc_phy_fia_idx) : + DFLEXDPMLE1_DPMLETC_ML1_0(dig_port->tc_phy_fia_idx); break; case 4: - val |= DFLEXDPMLE1_DPMLETC_ML3_0(tc_port); + val |= DFLEXDPMLE1_DPMLETC_ML3_0(dig_port->tc_phy_fia_idx); break; default: MISSING_CASE(required_lanes); @@ -164,9 +171,9 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) return mask; } - if (val & TC_LIVE_STATE_TBT(tc_port)) + if (val & TC_LIVE_STATE_TBT(dig_port->tc_phy_fia_idx)) mask |= BIT(TC_PORT_TBT_ALT); - if (val & TC_LIVE_STATE_TC(tc_port)) + if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx)) mask |= BIT(TC_PORT_DP_ALT); if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) @@ -182,7 +189,6 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -194,14 +200,13 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) return false; } - return val & DP_PHY_MODE_STATUS_COMPLETED(tc_port); + return val & DP_PHY_MODE_STATUS_COMPLETED(dig_port->tc_phy_fia_idx); } static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, bool enable) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -215,9 +220,9 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, return false; } - val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(tc_port); + val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); if (!enable) - val |= DP_PHY_MODE_STATUS_NOT_SAFE(tc_port); + val |= DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); intel_uncore_write(uncore, PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia), val); @@ -232,7 +237,6 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -244,7 +248,7 @@ static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port) return true; } - return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(tc_port)); + return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx)); } /* @@ -540,5 +544,5 @@ void intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) mutex_init(&dig_port->tc_lock); dig_port->tc_legacy_port = is_legacy; dig_port->tc_link_refcount = 0; - dig_port->tc_phy_fia = tc_port_to_fia(i915, tc_port); + tc_port_load_fia_params(i915, dig_port); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6ecb64c042efd..4236cb5d12df4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2166,13 +2166,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_FIA(fia, off) _MMIO(_FIA(fia) + (off)) /* ICL PHY DFLEX registers */ -#define PORT_TX_DFLEXDPMLE1(fia) _MMIO_FIA((fia), 0x008C0) -#define DFLEXDPMLE1_DPMLETC_MASK(tc_port) (0xf << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML0(tc_port) (1 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML1_0(tc_port) (3 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3(tc_port) (8 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) (12 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3_0(tc_port) (15 << (4 * (tc_port))) +#define PORT_TX_DFLEXDPMLE1(fia) _MMIO_FIA((fia), 0x008C0) +#define DFLEXDPMLE1_DPMLETC_MASK(idx) (0xf << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML0(idx) (1 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML1_0(idx) (3 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3(idx) (8 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3_2(idx) (12 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3_0(idx) (15 << (4 * (idx))) /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C @@ -11688,17 +11688,17 @@ enum skl_power_gate { #define PORT_TX_DFLEXDPSP(fia) _MMIO_FIA((fia), 0x008A0) #define MODULAR_FIA_MASK (1 << 4) -#define TC_LIVE_STATE_TBT(tc_port) (1 << ((tc_port) * 8 + 6)) -#define TC_LIVE_STATE_TC(tc_port) (1 << ((tc_port) * 8 + 5)) -#define DP_LANE_ASSIGNMENT_SHIFT(tc_port) ((tc_port) * 8) -#define DP_LANE_ASSIGNMENT_MASK(tc_port) (0xf << ((tc_port) * 8)) -#define DP_LANE_ASSIGNMENT(tc_port, x) ((x) << ((tc_port) * 8)) +#define TC_LIVE_STATE_TBT(idx) (1 << ((idx) * 8 + 6)) +#define TC_LIVE_STATE_TC(idx) (1 << ((idx) * 8 + 5)) +#define DP_LANE_ASSIGNMENT_SHIFT(idx) ((idx) * 8) +#define DP_LANE_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 8)) +#define DP_LANE_ASSIGNMENT(idx, x) ((x) << ((idx) * 8)) #define PORT_TX_DFLEXDPPMS(fia) _MMIO_FIA((fia), 0x00890) -#define DP_PHY_MODE_STATUS_COMPLETED(tc_port) (1 << (tc_port)) +#define DP_PHY_MODE_STATUS_COMPLETED(idx) (1 << (idx)) #define PORT_TX_DFLEXDPCSSS(fia) _MMIO_FIA((fia), 0x00894) -#define DP_PHY_MODE_STATUS_NOT_SAFE(tc_port) (1 << (tc_port)) +#define DP_PHY_MODE_STATUS_NOT_SAFE(idx) (1 << (idx)) /* This register controls the Display State Buffer (DSB) engines. */ #define _DSBSL_INSTANCE_BASE 0x70B00 From 57bd1798b1809a003c5354403a94f2ecb02bf3ae Mon Sep 17 00:00:00 2001 From: Clinton A Taylor Date: Fri, 20 Sep 2019 13:58:07 -0700 Subject: [PATCH 234/331] drm/i915/tgl/pll: Set update_active_dpll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 24a7bfe0c2d7 ("drm/i915: Keep the TypeC port mode fixed when the port is active") added this new hook while in parallel TGL upstream was happening and this was missed. Without this driver will crash when TC DDI is added and driver is preparing to do a full modeset. Cc: Lucas De Marchi Cc: Imre Deak Reviewed-by: Lucas De Marchi Signed-off-by: Clinton A Taylor Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190920205810.211048-4-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 98288edf88f0e..84e734d448283 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3479,6 +3479,7 @@ static const struct intel_dpll_mgr tgl_pll_mgr = { .dpll_info = tgl_plls, .get_dplls = icl_get_dplls, .put_dplls = icl_put_dplls, + .update_active_dpll = icl_update_active_dpll, .dump_hw_state = icl_dump_hw_state, }; From f15a4eb18264a6eba30fb32f21ae76b010a691d5 Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Fri, 20 Sep 2019 13:58:08 -0700 Subject: [PATCH 235/331] drm/i915/tgl: Add dkl phy registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are the registers needed to program Dekel phy. Some register definitions will be reused from MG PHY definitions, so adding a comment on those. Bspec: 49295 Reviewed-by: Lucas De Marchi Signed-off-by: Vandita Kulkarni Signed-off-by: Clinton A Taylor Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190920205810.211048-5-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 162 ++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4236cb5d12df4..a69c19aae5bb5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10115,6 +10115,168 @@ enum skl_power_gate { _TGL_DPLL1_CFGCR1, \ _TGL_TBTPLL_CFGCR1) +#define _DKL_PHY1_BASE 0x168000 +#define _DKL_PHY2_BASE 0x169000 +#define _DKL_PHY3_BASE 0x16A000 +#define _DKL_PHY4_BASE 0x16B000 +#define _DKL_PHY5_BASE 0x16C000 +#define _DKL_PHY6_BASE 0x16D000 + +/* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */ +#define _DKL_PLL_DIV0 0x200 +#define DKL_PLL_DIV0_INTEG_COEFF(x) ((x) << 16) +#define DKL_PLL_DIV0_INTEG_COEFF_MASK (0x1F << 16) +#define DKL_PLL_DIV0_PROP_COEFF(x) ((x) << 12) +#define DKL_PLL_DIV0_PROP_COEFF_MASK (0xF << 12) +#define DKL_PLL_DIV0_FBPREDIV_SHIFT (8) +#define DKL_PLL_DIV0_FBPREDIV(x) ((x) << DKL_PLL_DIV0_FBPREDIV_SHIFT) +#define DKL_PLL_DIV0_FBPREDIV_MASK (0xF << DKL_PLL_DIV0_FBPREDIV_SHIFT) +#define DKL_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define DKL_PLL_DIV0_FBDIV_INT_MASK (0xFF << 0) +#define DKL_PLL_DIV0(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_DIV0) + +#define _DKL_PLL_DIV1 0x204 +#define DKL_PLL_DIV1_IREF_TRIM(x) ((x) << 16) +#define DKL_PLL_DIV1_IREF_TRIM_MASK (0x1F << 16) +#define DKL_PLL_DIV1_TDC_TARGET_CNT(x) ((x) << 0) +#define DKL_PLL_DIV1_TDC_TARGET_CNT_MASK (0xFF << 0) +#define DKL_PLL_DIV1(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_DIV1) + +#define _DKL_PLL_SSC 0x210 +#define DKL_PLL_SSC_IREF_NDIV_RATIO(x) ((x) << 29) +#define DKL_PLL_SSC_IREF_NDIV_RATIO_MASK (0x7 << 29) +#define DKL_PLL_SSC_STEP_LEN(x) ((x) << 16) +#define DKL_PLL_SSC_STEP_LEN_MASK (0xFF << 16) +#define DKL_PLL_SSC_STEP_NUM(x) ((x) << 11) +#define DKL_PLL_SSC_STEP_NUM_MASK (0x7 << 11) +#define DKL_PLL_SSC_EN (1 << 9) +#define DKL_PLL_SSC(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_SSC) + +#define _DKL_PLL_BIAS 0x214 +#define DKL_PLL_BIAS_FRAC_EN_H (1 << 30) +#define DKL_PLL_BIAS_FBDIV_SHIFT (8) +#define DKL_PLL_BIAS_FBDIV_FRAC(x) ((x) << DKL_PLL_BIAS_FBDIV_SHIFT) +#define DKL_PLL_BIAS_FBDIV_FRAC_MASK (0x3FFFFF << DKL_PLL_BIAS_FBDIV_SHIFT) +#define DKL_PLL_BIAS(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_BIAS) + +#define _DKL_PLL_TDC_COLDST_BIAS 0x218 +#define DKL_PLL_TDC_SSC_STEP_SIZE(x) ((x) << 8) +#define DKL_PLL_TDC_SSC_STEP_SIZE_MASK (0xFF << 8) +#define DKL_PLL_TDC_FEED_FWD_GAIN(x) ((x) << 0) +#define DKL_PLL_TDC_FEED_FWD_GAIN_MASK (0xFF << 0) +#define DKL_PLL_TDC_COLDST_BIAS(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_TDC_COLDST_BIAS) + +#define _DKL_REFCLKIN_CTL 0x12C +/* Bits are the same as MG_REFCLKIN_CTL */ +#define DKL_REFCLKIN_CTL(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_REFCLKIN_CTL) + +#define _DKL_CLKTOP2_HSCLKCTL 0xD4 +/* Bits are the same as MG_CLKTOP2_HSCLKCTL */ +#define DKL_CLKTOP2_HSCLKCTL(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CLKTOP2_HSCLKCTL) + +#define _DKL_CLKTOP2_CORECLKCTL1 0xD8 +/* Bits are the same as MG_CLKTOP2_CORECLKCTL1 */ +#define DKL_CLKTOP2_CORECLKCTL1(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CLKTOP2_CORECLKCTL1) + +#define _DKL_TX_DPCNTL0 0x2C0 +#define DKL_TX_PRESHOOT_COEFF(x) ((x) << 13) +#define DKL_TX_PRESHOOT_COEFF_MASK (0x1f << 13) +#define DKL_TX_DE_EMPHASIS_COEFF(x) ((x) << 8) +#define DKL_TX_DE_EMPAHSIS_COEFF_MASK (0x1f << 8) +#define DKL_TX_VSWING_CONTROL(x) ((x) << 0) +#define DKL_TX_VSWING_CONTROL_MASK (0x7 << 0) +#define DKL_TX_DPCNTL0(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL0) + +#define _DKL_TX_DPCNTL1 0x2C4 +/* Bits are the same as DKL_TX_DPCNTRL0 */ +#define DKL_TX_DPCNTL1(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL1) + +#define _DKL_TX_DPCNTL2 0x2C8 +#define DKL_TX_DP20BITMODE (1 << 2) +#define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL2) + +#define _DKL_TX_FW_CALIB 0x2F8 +#define DKL_TX_CFG_DISABLE_WAIT_INIT (1 << 7) +#define DKL_TX_FW_CALIB(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_FW_CALIB) + +#define _DKL_TX_DW17 0xDC4 +#define DKL_TX_DW17(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DW17) + +#define _DKL_TX_DW18 0xDC8 +#define DKL_TX_DW18(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DW18) + +#define _DKL_DP_MODE 0xA0 +#define DKL_DP_MODE_CFG_GAONPWR_GATING (1 << 1) +#define DKL_DP_MODE_CFG_DIGPWR_GATING (1 << 2) +#define DKL_DP_MODE_CFG_CLNPWR_GATING (1 << 3) +#define DKL_DP_MODE_CFG_TRPWR_GATING (1 << 4) +#define DKL_DP_MODE_CFG_TR2PWR_GATING (1 << 5) +#define DKL_DP_MODE_CFG_GATING_CTRL_MASK (0x1f << 1) +#define DKL_DP_MODE_CFG_DP_X1_MODE (1 << 6) +#define DKL_DP_MODE_CFG_DP_X2_MODE (1 << 7) +#define DKL_DP_MODE(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_DP_MODE) + +#define _DKL_CMN_UC_DW27 0x36C +#define DKL_CMN_UC_DW27_UC_HEALTH (0x1 << 15) +#define DKL_CMN_UC_DW_27(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CMN_UC_DW27) + +/* + * Each Dekel PHY is addressed through a 4KB aperture. Each PHY has more than + * 4KB of register space, so a separate index is programmed in HIP_INDEX_REG0 + * or HIP_INDEX_REG1, based on the port number, to set the upper 2 address + * bits that point the 4KB window into the full PHY register space. + */ +#define _HIP_INDEX_REG0 0x1010A0 +#define _HIP_INDEX_REG1 0x1010A4 +#define HIP_INDEX_REG(tc_port) _MMIO((tc_port) < 4 ? _HIP_INDEX_REG0 \ + : _HIP_INDEX_REG1) +#define _HIP_INDEX_SHIFT(tc_port) (8 * ((tc_port) % 4)) +#define HIP_INDEX_VAL(tc_port, val) ((val) << _HIP_INDEX_SHIFT(tc_port)) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ From 8aaf5cbda8f1684ffb7027724f67da086f46a0f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 20 Sep 2019 13:58:09 -0700 Subject: [PATCH 236/331] drm/i915/icl: Unify disable and enable phy clock gating functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding a enable parameters allow us to share most of the code between enable and disable functions. v3: Renamed icl_phy_clock_gating() to icl_phy_set_clock_gating() Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190920205810.211048-6-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 73 ++++++++---------------- 1 file changed, 23 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index dfd6b064cbc3d..33cd766f9eeac 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3033,67 +3033,40 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) } } -static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port) +static void +icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) { struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum port port = dig_port->base.port; enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val; + u32 val, bits; int ln; if (tc_port == PORT_TC_NONE) return; - for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_DP_MODE(ln, port)); - val |= MG_DP_MODE_CFG_TR2PWR_GATING | - MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | - MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING; - I915_WRITE(MG_DP_MODE(ln, port), val); - } - - val = I915_READ(MG_MISC_SUS0(tc_port)); - val |= MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3) | - MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING; - I915_WRITE(MG_MISC_SUS0(tc_port), val); -} - -static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port) -{ - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val; - int ln; - - if (tc_port == PORT_TC_NONE) - return; + bits = MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | + MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | + MG_DP_MODE_CFG_GAONPWR_GATING; for (ln = 0; ln < 2; ln++) { val = I915_READ(MG_DP_MODE(ln, port)); - val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING | - MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | - MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING); + if (enable) + val |= bits; + else + val &= ~bits; I915_WRITE(MG_DP_MODE(ln, port), val); } + bits = MG_MISC_SUS0_CFG_TR2PWR_GATING | MG_MISC_SUS0_CFG_CL2PWR_GATING | + MG_MISC_SUS0_CFG_GAONPWR_GATING | MG_MISC_SUS0_CFG_TRPWR_GATING | + MG_MISC_SUS0_CFG_CL1PWR_GATING | MG_MISC_SUS0_CFG_DGPWR_GATING; + val = I915_READ(MG_MISC_SUS0(tc_port)); - val &= ~(MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK | - MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING); + if (enable) + val |= (bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3)); + else + val &= ~(bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK); I915_WRITE(MG_MISC_SUS0(tc_port), val); } @@ -3258,7 +3231,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_ddi_config_transcoder_func(crtc_state); /* 7.d */ - icl_disable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, false); /* 7.e */ icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, @@ -3328,7 +3301,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, dig_port->ddi_io_power_domain); icl_program_mg_dp_mode(dig_port); - icl_disable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, @@ -3361,7 +3334,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_ddi_enable_fec(encoder, crtc_state); - icl_enable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, true); if (!is_mst) intel_ddi_enable_pipe_clock(crtc_state); @@ -3398,7 +3371,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); icl_program_mg_dp_mode(dig_port); - icl_disable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, @@ -3410,7 +3383,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, else intel_prepare_hdmi_ddi_buffers(encoder, level); - icl_enable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, true); if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); From 27ffe6e570aa93e8389f7b0299a329e51d02843c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 20 Sep 2019 13:58:10 -0700 Subject: [PATCH 237/331] drm/i915/tgl: Check the UC health of tc controllers after power on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New step added for TGL, required for us to check the TC microcontroller health after power on TC aux. BSpec: 49294 Reviewed-by: Imre Deak Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190920205810.211048-7-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index ce88a27229ef5..f1186bc23542f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -562,6 +562,8 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, #endif +#define TGL_AUX_PW_TO_TC_PORT(pw_idx) ((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1) + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) @@ -578,6 +580,17 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, I915_WRITE(DP_AUX_CH_CTL(aux_ch), val); hsw_power_well_enable(dev_priv, power_well); + + if (INTEL_GEN(dev_priv) >= 12 && !power_well->desc->hsw.is_tc_tbt) { + enum tc_port tc_port; + + tc_port = TGL_AUX_PW_TO_TC_PORT(power_well->desc->hsw.idx); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + if (intel_de_wait_for_set(dev_priv, DKL_CMN_UC_DW_27(tc_port), + DKL_CMN_UC_DW27_UC_HEALTH, 1)) + DRM_WARN("Timeout waiting TC uC health\n"); + } } static void From b647c7df01b75761b4c0b1cb6f4841088c0b1121 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 16:28:42 +0100 Subject: [PATCH 238/331] drm/i915: Fixup preempt-to-busy vs resubmission of a virtual request As preempt-to-busy leaves the request on the HW as the resubmission is processed, that request may complete in the background and even cause a second virtual request to enter queue. This second virtual request breaks our "single request in the virtual pipeline" assumptions. Furthermore, as the virtual request may be completed and retired, we lose the reference the virtual engine assumes is held. Normally, just removing the request from the scheduler queue removes it from the engine, but the virtual engine keeps track of its singleton request via its ve->request. This pointer needs protecting with a reference. v2: Drop unnecessary motion of rq->engine = owner Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923152844.8914-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 32 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bff4c6a735cfc..114819fb5a247 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1256,6 +1256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) submit = true; last = rq; } + i915_request_put(rq); /* * Hmm, we have a bunch of virtual engine requests, @@ -2556,6 +2557,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) rq->engine = engine; __i915_request_submit(rq); + i915_request_put(rq); ve->base.execlists.queue_priority_hint = INT_MIN; } @@ -3809,6 +3811,8 @@ static void virtual_submission_tasklet(unsigned long data) static void virtual_submit_request(struct i915_request *rq) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + struct i915_request *old; + unsigned long flags; GEM_TRACE("%s: rq=%llx:%lld\n", ve->base.name, @@ -3817,15 +3821,31 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - GEM_BUG_ON(ve->request); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); + spin_lock_irqsave(&ve->base.active.lock, flags); + + old = ve->request; + if (old) { /* background completion event from preempt-to-busy */ + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } - ve->base.execlists.queue_priority_hint = rq_prio(rq); - WRITE_ONCE(ve->request, rq); + if (i915_request_completed(rq)) { + __i915_request_submit(rq); - list_move_tail(&rq->sched.link, virtual_queue(ve)); + ve->base.execlists.queue_priority_hint = INT_MIN; + ve->request = NULL; + } else { + ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->request = i915_request_get(rq); + + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + + tasklet_schedule(&ve->base.execlists.tasklet); + } - tasklet_schedule(&ve->base.execlists.tasklet); + spin_unlock_irqrestore(&ve->base.active.lock, flags); } static struct ve_bond * From cb2377a919bbe8107af269c5a31a8d5cfb27d867 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 16:28:43 +0100 Subject: [PATCH 239/331] drm/i915: Fixup preempt-to-busy vs reset of a virtual request Due to the nature of preempt-to-busy the execlists active tracking and the schedule queue may become temporarily desync'ed (between resubmission to HW and its ack from HW). This means that we may have unwound a request and passed it back to the virtual engine, but it is still inflight on the HW and may even result in a GPU hang. If we detect that GPU hang and try to reset, the hanging request->engine will no longer match the current engine, which means that the request is not on the execlists active list and we should not try to find an older incomplete request. Given that we have deduced this must be a request on a virtual engine, it is the single active request in the context and so must be guilty (as the context is still inflight, it is prevented from being executed on another engine as we process the reset). Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923152844.8914-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 7 +++++-- drivers/gpu/drm/i915/gt/intel_reset.c | 4 +--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 114819fb5a247..322287943e337 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2348,11 +2348,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = - &i915_request_active_timeline(rq)->requests; const struct intel_context * const ce = rq->hw_context; struct i915_request *active = NULL; + struct list_head *list; + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ + return rq; + + list = &i915_request_active_timeline(rq)->requests; list_for_each_entry_from_reverse(rq, list, link) { if (i915_request_completed(rq)) break; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 89048ca8924cd..ae68c3786f63d 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq) struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; - lockdep_assert_held(&engine->active.lock); - if (!i915_request_is_active(rq)) return; + lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->gem_context == hung_ctx) i915_request_skip(rq, -EIO); @@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rq->fence.seqno, yesno(guilty)); - lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { From e2144503bf3b22275dd33cef2880e1cb5fb200c5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Sep 2019 16:28:44 +0100 Subject: [PATCH 240/331] drm/i915: Prevent bonded requests from overtaking each other on preemption Force bonded requests to run on distinct engines so that they cannot be shuffled onto the same engine where timeslicing will reverse the order. A bonded request will often wait on a semaphore signaled by its master, creating an implicit dependency -- if we ignore that implicit dependency and allow the bonded request to run on the same engine and before its master, we will cause a GPU hang. [Whether it will hang the GPU is debatable, we should keep on timeslicing and each timeslice should be "accidentally" counted as forward progress, in which case it should run but at one-half to one-third speed.] We can prevent this inversion by restricting which engines we allow ourselves to jump to upon preemption, i.e. baking in the arrangement established at first execution. (We should also consider capturing the implicit dependency using i915_sched_add_dependency(), but first we need to think about the constraints that requires on the execution/retirement ordering.) Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") References: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding") Testcase: igt/gem_exec_balancer/bonded-slice Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190923152844.8914-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 322287943e337..6cfdc0f9f2b92 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3869,18 +3869,22 @@ static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + intel_engine_mask_t allowed, exec; struct ve_bond *bond; + allowed = ~to_request(signal)->engine->mask; + bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) { - intel_engine_mask_t old, new, cmp; + if (bond) + allowed &= bond->sibling_mask; - cmp = READ_ONCE(rq->execution_mask); - do { - old = cmp; - new = cmp & bond->sibling_mask; - } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old); - } + /* Restrict the bonded request to run on only the available engines */ + exec = READ_ONCE(rq->execution_mask); + while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) + ; + + /* Prevent the master from being re-run on the bonded engines */ + to_request(signal)->execution_mask &= ~allowed; } struct intel_context * From 5028851cdfdf78dc22eacbc44a0ab0b3f599ee4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Sep 2019 13:18:21 +0100 Subject: [PATCH 241/331] drm/i915: Mark contents as dirty on a write fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since dropping the set-to-gtt-domain in commit a679f58d0510 ("drm/i915: Flush pages on acquisition"), we no longer mark the contents as dirty on a write fault. This has the issue of us then not marking the pages as dirty on releasing the buffer, which means the contents are not written out to the swap device (should we ever pick that buffer as a victim). Notably, this is visible in the dumb buffer interface used for cursors. Having updated the cursor contents via mmap, and swapped away, if the shrinker should evict the old cursor, upon next reuse, the cursor would be invisible. E.g. echo 80 > /proc/sys/kernel/sysrq ; echo f > /proc/sysrq-trigger Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111541 Fixes: a679f58d0510 ("drm/i915: Flush pages on acquisition") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Ville Syrjälä Cc: # v5.2+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190920121821.7223-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 1748e63156a24..860b751c51f19 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -319,7 +319,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - i915_vma_set_ggtt_write(vma); + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + i915_vma_set_ggtt_write(vma); + obj->mm.dirty = true; + } err_fence: i915_vma_unpin_fence(vma); From 1b74d46782d0615dace4729c22d1862a0355b124 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 20 Sep 2019 11:37:54 +0300 Subject: [PATCH 242/331] drm/i915: Add TigerLake bandwidth checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added bandwidth calculation algorithm and checks, similar way as it was done for ICL, some constants were corrected according to BSpec 53998. v2: Start using same icl_get_bw_info function to avoid code duplication. Moved mpagesize to memory info related structure as it is now dependent on memory type. Fixed qi.t_bl field assignment. v3: Removed mpagesize as unused. Duplicate code and redundant blankline fixed. v4: Changed ordering of IS_GEN checks as agreed. Minor commit message fixes. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111600 Reviewed-by: James Ausmus Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190920083754.5920-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 26 +++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 688858ebe4d0a..cd58e47ab7b2a 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -56,7 +56,10 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, qi->num_channels = (val & 0xf0) >> 4; qi->num_points = (val & 0xf00) >> 8; - qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8; + if (IS_GEN(dev_priv, 12)) + qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 16; + else if (IS_GEN(dev_priv, 11)) + qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8; return 0; } @@ -132,20 +135,25 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) } struct intel_sa_info { - u8 deburst, mpagesize, deprogbwlimit, displayrtids; + u16 displayrtids; + u8 deburst, deprogbwlimit; }; static const struct intel_sa_info icl_sa_info = { .deburst = 8, - .mpagesize = 16, .deprogbwlimit = 25, /* GB/s */ .displayrtids = 128, }; -static int icl_get_bw_info(struct drm_i915_private *dev_priv) +static const struct intel_sa_info tgl_sa_info = { + .deburst = 16, + .deprogbwlimit = 34, /* GB/s */ + .displayrtids = 256, +}; + +static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; - const struct intel_sa_info *sa = &icl_sa_info; bool is_y_tile = true; /* assume y tile may be used */ int num_channels; int deinterleave; @@ -233,14 +241,16 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, void intel_bw_init_hw(struct drm_i915_private *dev_priv) { - if (IS_GEN(dev_priv, 11)) - icl_get_bw_info(dev_priv); + if (IS_GEN(dev_priv, 12)) + icl_get_bw_info(dev_priv, &tgl_sa_info); + else if (IS_GEN(dev_priv, 11)) + icl_get_bw_info(dev_priv, &icl_sa_info); } static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv, int num_planes) { - if (IS_GEN(dev_priv, 11)) + if (INTEL_GEN(dev_priv) >= 11) /* * FIXME with SAGV disabled maybe we can assume * point 1 will always be used? Seems to match From 7dc56af5260e958e36a08b2e0822029ddf765770 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Sep 2019 15:59:50 +0100 Subject: [PATCH 243/331] drm/i915/selftests: Verify the LRC register layout between init and HW Before we submit the first context to HW, we need to construct a valid image of the register state. This layout is defined by the HW and should match the layout generated by HW when it saves the context image. Asserting that this should be equivalent should help avoid any undefined behaviour and verify that we haven't missed anything important! Of course, having insisted that the initial register state within the LRC should match that returned by HW, we need to ensure that it does. v2: Drop the RELATIVE_MMIO flag from gen11, we ignore it for constructing the lrc image. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniele Ceraolo Spurio Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190924145950.3011-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 667 ++++++++++++------ drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 62 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 142 ++++ drivers/gpu/drm/i915/i915_perf.c | 35 +- drivers/gpu/drm/i915/i915_perf.h | 5 +- .../drm/i915/selftests/i915_live_selftests.h | 1 + 7 files changed, 648 insertions(+), 266 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4a34c4f62065d..f7ba0935ed670 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1115,7 +1115,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6cfdc0f9f2b92..caac091b3eb95 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -230,9 +230,10 @@ static int __execlists_context_alloc(struct intel_context *ce, struct intel_engine_cs *engine); static void execlists_init_reg_state(u32 *reg_state, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring); + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close); static void mark_eio(struct i915_request *rq) { @@ -471,6 +472,411 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) return desc; } +static u32 *set_offsets(u32 *regs, + const u8 *data, + const struct intel_engine_cs *engine) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count)) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END() 0 +{ + const u32 base = engine->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + regs += *data++ & ~BIT(7); + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + if (INTEL_GEN(engine->i915) >= 11) + *regs |= MI_LRI_CS_MMIO; + regs++; + + GEM_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + *regs = base + (offset << 2); + regs += 2; + } while (--count); + } + + return regs; +} + +static const u8 gen8_xcs_offsets[] = { + NOP(1), + LRI(11, 0), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + + NOP(9), + LRI(9, 0), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, 0), + REG16(0x200), + REG(0x028), + + END(), +}; + +static const u8 gen9_xcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, POSTED), + REG16(0x200), + + NOP(13), + LRI(44, POSTED), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(), +}; + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, POSTED), + REG16(0x200), + REG16(0x204), + + NOP(11), + LRI(50, POSTED), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG16(0x588), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(), +}; + +static const u8 gen8_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen11_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(1, POSTED), + REG(0x1b0), + + NOP(10), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(const struct intel_engine_cs *engine) +{ + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 11) + return gen11_rcs_offsets; + else + return gen8_rcs_offsets; + } else { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_xcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_xcs_offsets; + else + return gen8_xcs_offsets; + } +} + static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); @@ -654,7 +1060,7 @@ static u64 execlists_update_context(const struct i915_request *rq) struct intel_context *ce = rq->hw_context; u64 desc; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = + ce->lrc_reg_state[CTX_RING_TAIL] = intel_ring_set_tail(rq->ring, rq->tail); /* @@ -826,54 +1232,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - u32 base = engine->mmio_base; - - /* Refactor so that we only have one place that knows all the offsets! */ - GEM_WARN_ON(INTEL_GEN(engine->i915) >= 12); - - /* Must match execlists_init_reg_state()! */ - - /* Common part */ - regs[CTX_CONTEXT_CONTROL] = - i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); - regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); - regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); - regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); - regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); - - regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); - regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); - regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); - - regs[CTX_SECOND_BB_HEAD_U] = - i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); - regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); - regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); - - /* PPGTT part */ - regs[CTX_CTX_TIMESTAMP] = - i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); - - regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); - regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); - regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); - regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); - regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); - regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); - regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - - if (engine->class == RENDER_CLASS) { - regs[CTX_RCS_INDIRECT_CTX] = - i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); - regs[CTX_RCS_INDIRECT_CTX_OFFSET] = - i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); - regs[CTX_BB_PER_CTX_PTR] = - i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); - - regs[CTX_R_PWR_CLK_STATE] = - i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - } + set_offsets(regs, reg_offsets(engine), engine); } static bool virtual_matches(const struct virtual_engine *ve, @@ -1738,8 +2097,8 @@ static void execlists_context_unpin(struct intel_context *ce) } static void -__execlists_update_reg_state(struct intel_context *ce, - struct intel_engine_cs *engine) +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -1747,16 +2106,16 @@ __execlists_update_reg_state(struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD + 1] = ring->head; - regs[CTX_RING_TAIL + 1] = ring->tail; + regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD] = ring->head; + regs[CTX_RING_TAIL] = ring->tail; /* RPCS */ if (engine->class == RENDER_CLASS) { - regs[CTX_R_PWR_CLK_STATE + 1] = + regs[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); - i915_oa_init_reg_state(engine, ce, regs); + i915_oa_init_reg_state(ce, engine); } } @@ -2465,7 +2824,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, ce, engine, ce->ring); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); out_replay: GEM_TRACE("%s replay {head:%04x, tail:%04x\n", @@ -3243,7 +3602,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) return 0; } -static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) { u32 indirect_ctx_offset; @@ -3278,75 +3637,48 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) static void init_common_reg_state(u32 * const regs, - struct i915_ppgtt * const ppgtt, - struct intel_engine_cs *engine, - struct intel_ring *ring) + const struct intel_engine_cs *engine, + const struct intel_ring *ring) { - const u32 base = engine->mmio_base; - - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); - if (INTEL_GEN(engine->i915) < 11) { - regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + if (INTEL_GEN(engine->i915) < 11) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); - } - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), - RING_CTL_SIZE(ring->size) | RING_VALID); - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + + regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; + regs[CTX_BB_STATE] = RING_BB_PPGTT; } static void init_wa_bb_reg_state(u32 * const regs, - struct intel_engine_cs *engine, + const struct intel_engine_cs *engine, u32 pos_bb_per_ctx) { - struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - const u32 base = engine->mmio_base; - const u32 pos_indirect_ctx = pos_bb_per_ctx + 2; - const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2; + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; + + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[pos_bb_per_ctx] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; + } - CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, pos_indirect_ctx_offset, - RING_INDIRECT_CTX_OFFSET(base), 0); if (wa_ctx->indirect_ctx.size) { const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - regs[pos_indirect_ctx + 1] = + regs[pos_bb_per_ctx + 2] = (ggtt_offset + wa_ctx->indirect_ctx.offset) | (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - regs[pos_indirect_ctx_offset + 1] = + regs[pos_bb_per_ctx + 4] = intel_lr_indirect_ctx_offset(engine) << 6; } - - CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0); - if (wa_ctx->per_ctx.size) { - const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[pos_bb_per_ctx + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } } -static void init_ppgtt_reg_state(u32 *regs, u32 base, - struct i915_ppgtt *ppgtt) +static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) { - /* PDP values well be assigned later if needed */ - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); - if (i915_vm_is_4lvl(&ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and @@ -3369,91 +3701,11 @@ static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) return i915_vm_to_ppgtt(vm); } -static void gen8_init_reg_state(u32 * const regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt * const ppgtt = vm_alias(ce->vm); - const bool rcs = engine->class == RENDER_CLASS; - const u32 base = engine->mmio_base; - const u32 lri_base = - intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; - - regs[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED | - lri_base; - - init_common_reg_state(regs, ppgtt, engine, ring); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) - init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR); - - regs[CTX_LRI_HEADER_1] = - MI_LOAD_REGISTER_IMM(9) | - MI_LRI_FORCE_POSTED | - lri_base; - - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - - init_ppgtt_reg_state(regs, base, ppgtt); - - if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base; - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - } - - regs[CTX_END] = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - regs[CTX_END] |= BIT(0); -} - -static void gen12_init_reg_state(u32 * const regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm); - const bool rcs = engine->class == RENDER_CLASS; - const u32 base = engine->mmio_base; - const u32 lri_base = - intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0; - - regs[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) | - MI_LRI_FORCE_POSTED | - lri_base; - - init_common_reg_state(regs, ppgtt, engine, ring); - - /* We want ctx_ptr for all engines to be set */ - init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR); - - regs[CTX_LRI_HEADER_1] = - MI_LOAD_REGISTER_IMM(9) | - MI_LRI_FORCE_POSTED | - lri_base; - - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - - init_ppgtt_reg_state(regs, base, ppgtt); - - if (rcs) { - regs[GEN12_CTX_LRI_HEADER_3] = - MI_LOAD_REGISTER_IMM(1) | lri_base; - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); - - /* TODO: oa_init_reg_state ? */ - } -} - static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close) { /* * A context is actually a big batch buffer with several @@ -3465,10 +3717,21 @@ static void execlists_init_reg_state(u32 *regs, * * Must keep consistent with virtual_update_register_offsets(). */ - if (INTEL_GEN(engine->i915) >= 12) - gen12_init_reg_state(regs, ce, engine, ring); - else - gen8_init_reg_state(regs, ce, engine, ring); + u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); + + if (close) { /* Close the batch; used mainly by live_lrc_layout() */ + *bbe = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *bbe |= BIT(0); + } + + init_common_reg_state(regs, engine, ring); + init_ppgtt_reg_state(regs, vm_alias(ce->vm)); + + init_wa_bb_reg_state(regs, engine, + INTEL_GEN(engine->i915) >= 12 ? + GEN12_CTX_BB_PER_CTX_PTR : + CTX_BB_PER_CTX_PTR); } static int @@ -3477,6 +3740,7 @@ populate_lr_context(struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring) { + bool inhibit = true; void *vaddr; u32 *regs; int ret; @@ -3508,14 +3772,15 @@ populate_lr_context(struct intel_context *ce, memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + inhibit = false; } /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring); - if (!engine->default_state) - regs[CTX_CONTEXT_CONTROL + 1] |= + execlists_init_reg_state(regs, ce, engine, ring, inhibit); + if (inhibit) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); ret = 0; @@ -4212,7 +4477,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, ce, engine, ce->ring); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); } /* Rerun the request; its payload has been neutered (if guilty). */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 7e773e74a3fe0..06ab0276e10e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -10,60 +10,40 @@ #include /* GEN8 to GEN11 Reg State Context */ -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_END 0x44 +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_BUFFER_START (0x08 + 1) +#define CTX_RING_BUFFER_CONTROL (0x0a + 1) +#define CTX_BB_STATE (0x10 + 1) +#define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_PDP3_UDW (0x24 + 1) +#define CTX_PDP3_LDW (0x26 + 1) +#define CTX_PDP2_UDW (0x28 + 1) +#define CTX_PDP2_LDW (0x2a + 1) +#define CTX_PDP1_UDW (0x2c + 1) +#define CTX_PDP1_LDW (0x2e + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) +#define CTX_R_PWR_CLK_STATE (0x42 + 1) #define GEN9_CTX_RING_MI_MODE 0x54 /* GEN12+ Reg State Context */ -#define GEN12_CTX_BB_PER_CTX_PTR 0x12 -#define GEN12_CTX_LRI_HEADER_3 0x41 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - u32 *reg_state__ = (reg_state); \ - const u32 pos__ = (pos); \ - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ - (reg_state__)[(pos__) + 1] = (val); \ -} while (0) +#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ - (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \ } while (0) #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = px_dma(ppgtt->pd); \ - (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \ } while (0) #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 93a871bfd95d1..22ea2e7470643 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2201,3 +2201,145 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) return i915_live_subtests(tests, i915); } + +static void hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static int live_lrc_layout(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 *mem; + int err; + + /* + * Check the registers offsets we use to create the initial reg state + * match the layout saved by HW. + */ + + mem = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!mem) + return -ENOMEM; + + err = 0; + for_each_engine(engine, gt->i915, id) { + u32 *hw, *lrc; + int dw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + lrc = memset(mem, 0, PAGE_SIZE); + execlists_init_reg_state(lrc, + engine->kernel_context, + engine, + engine->kernel_context->ring, + true); + + dw = 0; + do { + u32 lri = hw[dw]; + + if (lri == 0) { + dw++; + continue; + } + + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + pr_err("%s: Expected LRI command at dword %d, found %08x\n", + engine->name, dw, lri); + err = -EINVAL; + break; + } + + if (lrc[dw] != lri) { + pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", + engine->name, dw, lri, lrc[dw]); + err = -EINVAL; + break; + } + + lri &= 0x7f; + lri++; + dw++; + + while (lri) { + if (hw[dw] != lrc[dw]) { + pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", + engine->name, dw, hw[dw], lrc[dw]); + err = -EINVAL; + break; + } + + /* + * Skip over the actual register value as we + * expect that to differ. + */ + dw += 2; + lri -= 2; + } + } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + if (err) { + pr_info("%s: HW register image:\n", engine->name); + hexdump(hw, PAGE_SIZE); + + pr_info("%s: SW register image:\n", engine->name); + hexdump(lrc, PAGE_SIZE); + } + + i915_gem_object_unpin_map(engine->default_state); + if (err) + break; + } + + kfree(mem); + return err; +} + +int intel_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_lrc_layout), + }; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c1b7642337615..524f6710b7aa9 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1673,10 +1673,8 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, * in the case that the OA unit has been disabled. */ static void -gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, - struct intel_context *ce, - u32 *reg_state, - const struct i915_oa_config *oa_config) +gen8_update_reg_state_unlocked(const struct intel_context *ce, + const struct i915_perf_stream *stream) { struct drm_i915_private *i915 = ce->engine->i915; u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; @@ -1691,21 +1689,19 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, EU_PERF_CNTL5, EU_PERF_CNTL6, }; + u32 *reg_state = ce->lrc_reg_state; int i; - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, + reg_state[ctx_oactxctrl + 1] = (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME); + GEN8_OA_COUNTER_RESUME; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { - CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], - oa_config_flex_reg(oa_config, flex_regs[i])); - } + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + reg_state[ctx_flexeu0 + i * 2 + 1] = + oa_config_flex_reg(stream->oa_config, flex_regs[i]); - CTX_REG(reg_state, - CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - intel_sseu_make_rpcs(i915, &ce->sseu)); + reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu); } struct flex { @@ -1729,7 +1725,7 @@ gen8_store_flex(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; do { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = offset + (flex->offset + 1) * sizeof(u32); + *cs++ = offset + flex->offset * sizeof(u32); *cs++ = 0; *cs++ = flex->value; } while (flex++, --count); @@ -1863,7 +1859,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, struct drm_i915_private *i915 = stream->dev_priv; /* The MMIO offsets for Flex EU registers aren't contiguous */ const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) struct flex regs[] = { { GEN8_R_PWR_CLK_STATE, @@ -1871,7 +1867,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }, { GEN8_OACTXCONTROL, - i915->perf.ctx_oactxctrl_offset, + i915->perf.ctx_oactxctrl_offset + 1, ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME) @@ -2299,9 +2295,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return ret; } -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *regs) +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct i915_perf_stream *stream; @@ -2313,7 +2308,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, stream = engine->i915->perf.exclusive_stream; if (stream) - gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config); + gen8_update_reg_state_unlocked(ce, stream); } /** diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index a412b16d9ffcf..f4fb311184b1d 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -25,8 +25,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *reg_state); +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); #endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 1ccf0f731ac05..66d83c1390c16 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,6 +15,7 @@ selftest(workarounds, intel_workarounds_live_selftests) selftest(gt_engines, intel_engine_live_selftests) selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) +selftest(gt_lrc, intel_lrc_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) From 6ea3cee6d77d2c5513d5a61162b209f671fb3bb8 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Tue, 24 Sep 2019 13:01:52 +0530 Subject: [PATCH 244/331] drm/i915: Add Pipe D cursor ctrl register for Gen12 Currently the offset for PIPE D cursor control register is missing in i915_reg.h due to which the cursor plane cannot be enabled for Pipe D. This also causes kernel Warning, when a user requests to enable cursor plane for PIPE D for Gen 12 platforms. This patch adds the CURSOR_CTL_D register in the i915_reg.h. v2: Rebase Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111640 Signed-off-by: Ankit Nautiyal Reviewed-by: Lucas De Marchi [Lucas: remove extra blank line] Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/1569310312-12313-1-git-send-email-ankit.k.nautiyal@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 9 +++++++++ drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index c2faa679658c0..43530b0abc964 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -118,6 +118,14 @@ [PIPE_C] = IVB_CURSOR_C_OFFSET, \ } +#define TGL_CURSOR_OFFSETS \ + .cursor_offsets = { \ + [PIPE_A] = CURSOR_A_OFFSET, \ + [PIPE_B] = IVB_CURSOR_B_OFFSET, \ + [PIPE_C] = IVB_CURSOR_C_OFFSET, \ + [PIPE_D] = TGL_CURSOR_D_OFFSET, \ + } + #define I9XX_COLORS \ .color = { .gamma_lut_size = 256 } #define I965_COLORS \ @@ -787,6 +795,7 @@ static const struct intel_device_info intel_elkhartlake_info = { [TRANSCODER_DSI_0] = TRANSCODER_DSI0_OFFSET, \ [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ + TGL_CURSOR_OFFSETS, \ .has_global_mocs = 1, \ .display.has_dsb = 1 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a69c19aae5bb5..28c483a3bbba2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6240,6 +6240,7 @@ enum { #define CHV_CURSOR_C_OFFSET 0x700e0 #define IVB_CURSOR_B_OFFSET 0x71080 #define IVB_CURSOR_C_OFFSET 0x72080 +#define TGL_CURSOR_D_OFFSET 0x73080 /* Display A control */ #define _DSPACNTR 0x70180 From b1da91c9ddcef7ee077ea04797db7fa3a952d683 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Sep 2019 18:35:01 +0100 Subject: [PATCH 245/331] drm/i915/tgl: Swap engines for no rps (gpu reclocking) If we disable rps, it appears the Tigerlake is stable enough to run multiple engines simultaneously in CI. As disabling rps should only cause the execution to be slow, whereas many features depend on the different engines, we would prefer to have the engines enabled while the machine hangs are being debugged. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111714 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190924173501.21956-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 43530b0abc964..ea53dfe2fba04 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -807,7 +807,7 @@ static const struct intel_device_info intel_tigerlake_12_info = { .display.has_modular_fia = 1, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), - .engine_mask = BIT(RCS0), /* XXX reduced for debugging */ + .has_rps = false, /* XXX disabled for debugging */ }; #undef GEN From 1b8588741fdcb5976a343f7e5f47c5c97593d6d5 Mon Sep 17 00:00:00 2001 From: Swati Sharma Date: Tue, 24 Sep 2019 19:28:20 +0530 Subject: [PATCH 246/331] Revert "drm/i915/color: Extract icl_read_luts()" This reverts commit 84af7649188194a74cdd6437235a5e3c86108f0f. This is causing problems with the display, displays are all bright colors. Fixes: 84af76491881 ("drm/i915/color: Extract icl_read_luts()") Signed-off-by: Swati Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190924135820.11850-1-swati2.sharma@intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 126 +++------------------ drivers/gpu/drm/i915/i915_reg.h | 6 - 2 files changed, 15 insertions(+), 117 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 402151128e1fd..9ab34902663e2 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1420,9 +1420,6 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) { - if (!crtc_state->gamma_enable) - return 0; - switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1436,9 +1433,6 @@ static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) { - if (!crtc_state->gamma_enable) - return 0; - if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) return 0; @@ -1455,9 +1449,6 @@ static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) { - if (!crtc_state->gamma_enable) - return 0; - if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) return 10; else @@ -1466,9 +1457,6 @@ static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) { - if (!crtc_state->gamma_enable) - return 0; - switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1480,39 +1468,21 @@ static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) } } -static int icl_gamma_precision(const struct intel_crtc_state *crtc_state) -{ - if ((crtc_state->gamma_mode & POST_CSC_GAMMA_ENABLE) == 0) - return 0; - - switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { - case GAMMA_MODE_MODE_8BIT: - return 8; - case GAMMA_MODE_MODE_10BIT: - return 10; - case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: - return 16; - default: - MISSING_CASE(crtc_state->gamma_mode); - return 0; - } - -} - int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (!crtc_state->gamma_enable) + return 0; + if (HAS_GMCH(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) return chv_gamma_precision(crtc_state); else return i9xx_gamma_precision(crtc_state); } else { - if (INTEL_GEN(dev_priv) >= 11) - return icl_gamma_precision(crtc_state); - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) return glk_gamma_precision(crtc_state); else if (IS_IRONLAKE(dev_priv)) return ilk_gamma_precision(crtc_state); @@ -1543,20 +1513,6 @@ static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1, return true; } -static bool intel_color_lut_entry_multi_equal(struct drm_color_lut *lut1, - struct drm_color_lut *lut2, - int lut_size, u32 err) -{ - int i; - - for (i = 0; i < 9; i++) { - if (!err_check(&lut1[i], &lut2[i], err)) - return false; - } - - return true; -} - bool intel_color_lut_equal(struct drm_property_blob *blob1, struct drm_property_blob *blob2, u32 gamma_mode, u32 bit_precision) @@ -1575,8 +1531,16 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, lut_size2 = drm_color_lut_size(blob2); /* check sw and hw lut size */ - if (lut_size1 != lut_size2) - return false; + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (lut_size1 != lut_size2) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } lut1 = blob1->data; lut2 = blob2->data; @@ -1584,18 +1548,13 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, err = 0xffff >> bit_precision; /* check sw and hw lut entry to be equal */ - switch (gamma_mode & GAMMA_MODE_MODE_MASK) { + switch (gamma_mode) { case GAMMA_MODE_MODE_8BIT: case GAMMA_MODE_MODE_10BIT: if (!intel_color_lut_entry_equal(lut1, lut2, lut_size2, err)) return false; break; - case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: - if (!intel_color_lut_entry_multi_equal(lut1, lut2, - lut_size2, err)) - return false; - break; default: MISSING_CASE(gamma_mode); return false; @@ -1835,60 +1794,6 @@ static void glk_read_luts(struct intel_crtc_state *crtc_state) crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); } -static struct drm_property_blob * -icl_read_lut_multi_segment(const struct intel_crtc_state *crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; - enum pipe pipe = crtc->pipe; - struct drm_property_blob *blob; - struct drm_color_lut *blob_data; - u32 i, val1, val2; - - blob = drm_property_create_blob(&dev_priv->drm, - sizeof(struct drm_color_lut) * lut_size, - NULL); - if (IS_ERR(blob)) - return NULL; - - blob_data = blob->data; - - I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); - - for (i = 0; i < 9; i++) { - val1 = I915_READ(PREC_PAL_MULTI_SEG_DATA(pipe)); - val2 = I915_READ(PREC_PAL_MULTI_SEG_DATA(pipe)); - - blob_data[i].red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, val2) << 6 | - REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, val1); - blob_data[i].green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, val2) << 6 | - REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, val1); - blob_data[i].blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, val2) << 6 | - REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, val1); - } - - /* - * FIXME readouts from PAL_PREC_DATA register aren't giving correct values - * in the case of fine and coarse segments. Restricting readouts only for - * super fine segment as of now. - */ - - return blob; -} - -static void icl_read_luts(struct intel_crtc_state *crtc_state) -{ - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == - GAMMA_MODE_MODE_8BIT) - crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); - else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == - GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) - crtc_state->base.gamma_lut = icl_read_lut_multi_segment(crtc_state); - else - crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); -} - void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1932,7 +1837,6 @@ void intel_color_init(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.load_luts = icl_load_luts; - dev_priv->display.read_luts = icl_read_luts; } else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { dev_priv->display.load_luts = glk_load_luts; dev_priv->display.read_luts = glk_read_luts; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 28c483a3bbba2..e752de9470bd0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10575,12 +10575,6 @@ enum skl_power_gate { #define _PAL_PREC_MULTI_SEG_DATA_A 0x4A40C #define _PAL_PREC_MULTI_SEG_DATA_B 0x4AC0C -#define PAL_PREC_MULTI_SEG_RED_LDW_MASK REG_GENMASK(29, 24) -#define PAL_PREC_MULTI_SEG_RED_UDW_MASK REG_GENMASK(29, 20) -#define PAL_PREC_MULTI_SEG_GREEN_LDW_MASK REG_GENMASK(19, 14) -#define PAL_PREC_MULTI_SEG_GREEN_UDW_MASK REG_GENMASK(19, 10) -#define PAL_PREC_MULTI_SEG_BLUE_LDW_MASK REG_GENMASK(9, 4) -#define PAL_PREC_MULTI_SEG_BLUE_UDW_MASK REG_GENMASK(9, 0) #define PREC_PAL_MULTI_SEG_INDEX(pipe) _MMIO_PIPE(pipe, \ _PAL_PREC_MULTI_SEG_INDEX_A, \ From ed06efb801bd291e935238d3fba46fa03d098f0e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 25 Sep 2019 10:21:09 +0200 Subject: [PATCH 247/331] drm/i915/dp: Fix dsc bpp calculations, v5. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was a integer wraparound when mode_clock became too high, and we didn't correct for the FEC overhead factor when dividing, with the calculations breaking at HBR3. As a result our calculated bpp was way too high, and the link width limitation never came into effect. Print out the resulting bpp calcululations as a sanity check, just in case we ever have to debug it later on again. We also used the wrong factor for FEC. While bspec mentions 2.4%, all the calculations use 1/0.972261, and the same ratio should be applied to data M/N as well, so use it there when FEC is enabled. This fixes the FIFO underrun we are seeing with FEC enabled. Changes since v2: - Handle fec_enable in intel_link_compute_m_n, so only data M/N is adjusted. (Ville) - Fix initial hardware readout for FEC. (Ville) Changes since v3: - Remove bogus fec_to_mode_clock. (Ville) Changes since v4: - Use the correct register for icl. (Ville) - Split hw readout to a separate patch. Signed-off-by: Maarten Lankhorst Fixes: d9218c8f6cf4 ("drm/i915/dp: Add helpers for Compressed BPP and Slice Count for DSC") Cc: # v5.0+ Cc: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190925082110.17439-1-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_display.c | 12 +- drivers/gpu/drm/i915/display/intel_display.h | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 184 ++++++++++--------- drivers/gpu/drm/i915/display/intel_dp.h | 6 +- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 5 files changed, 107 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5ecf542701811..c4c9286be9879 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7291,7 +7291,7 @@ static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc, pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n, false); + link_bw, &pipe_config->fdi_m_n, false, false); ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EDEADLK) @@ -7538,11 +7538,15 @@ void intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n) + bool constant_n, bool fec_enable) { - m_n->tu = 64; + u32 data_clock = bits_per_pixel * pixel_clock; + + if (fec_enable) + data_clock = intel_dp_mode_to_fec_clock(data_clock); - compute_m_n(bits_per_pixel * pixel_clock, + m_n->tu = 64; + compute_m_n(data_clock, link_clock * nlanes * 8, &m_n->gmch_m, &m_n->gmch_n, constant_n); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 5cea6f8e107ae..4b9e18e5a263d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -443,7 +443,7 @@ enum phy_fia { void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n); + bool constant_n, bool fec_enable); bool is_ccs_modifier(u64 modifier); void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5cce9459e93c0..893c9807755cc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -76,8 +76,8 @@ #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 #define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 -/* DP DSC FEC Overhead factor = (100 - 2.4)/100 */ -#define DP_DSC_FEC_OVERHEAD_FACTOR 976 +/* DP DSC FEC Overhead factor = 1/(0.972261) */ +#define DP_DSC_FEC_OVERHEAD_FACTOR 972261 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -492,6 +492,97 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock) +{ + return div_u64(mul_u32_u32(mode_clock, 1000000U), + DP_DSC_FEC_OVERHEAD_FACTOR); +} + +static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count, + u32 mode_clock, u32 mode_hdisplay) +{ + u32 bits_per_pixel, max_bpp_small_joiner_ram; + int i; + + /* + * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* + * (LinkSymbolClock)* 8 * (TimeSlotsPerMTP) + * for SST -> TimeSlotsPerMTP is 1, + * for MST -> TimeSlotsPerMTP has to be calculated + */ + bits_per_pixel = (link_clock * lane_count * 8) / + intel_dp_mode_to_fec_clock(mode_clock); + DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel); + + /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ + max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / mode_hdisplay; + DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram); + + /* + * Greatest allowed DSC BPP = MIN (output BPP from available Link BW + * check, output bpp from small joiner RAM check) + */ + bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); + + /* Error out if the max bpp is less than smallest allowed valid bpp */ + if (bits_per_pixel < valid_dsc_bpp[0]) { + DRM_DEBUG_KMS("Unsupported BPP %u, min %u\n", + bits_per_pixel, valid_dsc_bpp[0]); + return 0; + } + + /* Find the nearest match in the array of known BPPs from VESA */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { + if (bits_per_pixel < valid_dsc_bpp[i + 1]) + break; + } + bits_per_pixel = valid_dsc_bpp[i]; + + /* + * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, + * fractional part is 0 + */ + return bits_per_pixel << 4; +} + +static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, + int mode_clock, int mode_hdisplay) +{ + u8 min_slice_count, i; + int max_slice_width; + + if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_0); + else + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_1); + + max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); + if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { + DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", + max_slice_width); + return 0; + } + /* Also take into account max slice width */ + min_slice_count = min_t(u8, min_slice_count, + DIV_ROUND_UP(mode_hdisplay, + max_slice_width)); + + /* Find the closest match to the valid slice count values */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { + if (valid_dsc_slicecount[i] > + drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, + false)) + break; + if (min_slice_count <= valid_dsc_slicecount[i]) + return valid_dsc_slicecount[i]; + } + + DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); + return 0; +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -2259,7 +2350,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, - constant_n); + constant_n, pipe_config->fec_enable); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -2269,7 +2360,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, &pipe_config->dp_m2_n2, - constant_n); + constant_n, pipe_config->fec_enable); } if (!HAS_DDI(dev_priv)) @@ -4373,91 +4464,6 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay) -{ - u16 bits_per_pixel, max_bpp_small_joiner_ram; - int i; - - /* - * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* - * (LinkSymbolClock)* 8 * ((100-FECOverhead)/100)*(TimeSlotsPerMTP) - * FECOverhead = 2.4%, for SST -> TimeSlotsPerMTP is 1, - * for MST -> TimeSlotsPerMTP has to be calculated - */ - bits_per_pixel = (link_clock * lane_count * 8 * - DP_DSC_FEC_OVERHEAD_FACTOR) / - mode_clock; - - /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / - mode_hdisplay; - - /* - * Greatest allowed DSC BPP = MIN (output BPP from avaialble Link BW - * check, output bpp from small joiner RAM check) - */ - bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); - - /* Error out if the max bpp is less than smallest allowed valid bpp */ - if (bits_per_pixel < valid_dsc_bpp[0]) { - DRM_DEBUG_KMS("Unsupported BPP %d\n", bits_per_pixel); - return 0; - } - - /* Find the nearest match in the array of known BPPs from VESA */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { - if (bits_per_pixel < valid_dsc_bpp[i + 1]) - break; - } - bits_per_pixel = valid_dsc_bpp[i]; - - /* - * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, - * fractional part is 0 - */ - return bits_per_pixel << 4; -} - -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, - int mode_clock, - int mode_hdisplay) -{ - u8 min_slice_count, i; - int max_slice_width; - - if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_0); - else - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_1); - - max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); - if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { - DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", - max_slice_width); - return 0; - } - /* Also take into account max slice width */ - min_slice_count = min_t(u8, min_slice_count, - DIV_ROUND_UP(mode_hdisplay, - max_slice_width)); - - /* Find the closest match to the valid slice count values */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { - if (valid_dsc_slicecount[i] > - drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, - false)) - break; - if (min_slice_count <= valid_dsc_slicecount[i]) - return valid_dsc_slicecount[i]; - } - - DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); - return 0; -} - static void intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index e01d1f89409da..a194b5b6da059 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -103,10 +103,6 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay); -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); @@ -119,4 +115,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) return ~((1 << lane_count) - 1) & 0xf; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index eeeb3f933aa4c..cf4d851a51395 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -81,7 +81,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, crtc_state->port_clock, &crtc_state->dp_m_n, - constant_n); + constant_n, crtc_state->fec_enable); crtc_state->dp_m_n.tu = slots; return 0; From 8aa940c8551cce80bafcd8e1cd93d7b39a1acd8a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 25 Sep 2019 10:21:10 +0200 Subject: [PATCH 248/331] drm/i915: Add hardware readout for FEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Readout the FEC state in encoder->get_config(), this will allow us to ensure that we can correctly inherit the state from boot, and that we set FEC during modeset. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190925082110.17439-2-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/display/intel_ddi.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/display/intel_display.c | 1 + 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 33cd766f9eeac..14fe987888a67 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4021,6 +4021,23 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; intel_dp_get_m_n(intel_crtc, pipe_config); + + if (INTEL_GEN(dev_priv) >= 11) { + i915_reg_t dp_tp_ctl; + + if (IS_GEN(dev_priv, 11)) + dp_tp_ctl = DP_TP_CTL(encoder->port); + else + dp_tp_ctl = TGL_DP_TP_CTL(pipe_config->cpu_transcoder); + + pipe_config->fec_enable = + I915_READ(dp_tp_ctl) & DP_TP_CTL_FEC_ENABLE; + + DRM_DEBUG_KMS("[ENCODER:%d:%s] Fec status: %u\n", + encoder->base.base.id, encoder->base.name, + pipe_config->fec_enable); + } + break; case TRANS_DDI_MODE_SELECT_DP_MST: pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c4c9286be9879..31698a57773fd 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12836,6 +12836,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(hdmi_scrambling); PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); PIPE_CONF_CHECK_BOOL(has_infoframe); + PIPE_CONF_CHECK_BOOL(fec_enable); PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); From c22d62e6e49beadea0024fe4eb9a02ce0f635657 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 20 Sep 2019 13:42:18 +0200 Subject: [PATCH 249/331] drm/i915: Get rid of crtc_state->fb_changed We had this as an optimization to not do a plane update, but we killed it off because there are so many reasons we may have to do a plane update or fastset that it's best to just assume everything changed. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190920114235.22411-6-maarten.lankhorst@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_atomic.c | 1 - drivers/gpu/drm/i915/display/intel_display.c | 10 +--------- drivers/gpu/drm/i915/display/intel_display_types.h | 1 - 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 698802da07b73..f7a981ff68129 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -199,7 +199,6 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->disable_cxsr = false; crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; - crtc_state->fb_changed = false; crtc_state->fifo_changed = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 31698a57773fd..292be4b71a821 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11582,7 +11582,6 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat bool was_crtc_enabled = old_crtc_state->base.active; bool is_crtc_enabled = crtc_state->base.active; bool turn_off, turn_on, visible, was_visible; - struct drm_framebuffer *fb = plane_state->base.fb; int ret; if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { @@ -11616,18 +11615,11 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat if (!was_visible && !visible) return 0; - if (fb != old_plane_state->base.fb) - crtc_state->fb_changed = true; - turn_off = was_visible && (!visible || mode_changed); turn_on = visible && (!was_visible || mode_changed); - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] with [PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", crtc->base.base.id, crtc->base.name, - plane->base.base.id, plane->base.name, - fb ? fb->base.id : -1); - - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6b0a646f0170a..a33644f12af13 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -766,7 +766,6 @@ struct intel_crtc_state { bool update_pipe; /* can a fast modeset be performed? */ bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ - bool fb_changed; /* fb on any of the planes is changed */ bool fifo_changed; /* FIFO split is changed */ /* Pipe source size (ie. panel fitter input size) From c47b7ddbcb29fca51515294a6fd2ed1d7d861939 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 20 Sep 2019 13:42:20 +0200 Subject: [PATCH 250/331] drm/i915: Rename planar linked plane variables Rename linked_plane to planar_linked_plane and slave to planar_slave, this will make it easier to keep apart bigjoiner linking and planar plane linking. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190920114235.22411-8-maarten.lankhorst@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_atomic.c | 7 ++++-- .../gpu/drm/i915/display/intel_atomic_plane.c | 4 ++-- drivers/gpu/drm/i915/display/intel_display.c | 22 +++++++++---------- .../drm/i915/display/intel_display_types.h | 8 +++---- drivers/gpu/drm/i915/display/intel_sprite.c | 4 ++-- drivers/gpu/drm/i915/intel_pm.c | 12 +++++----- 6 files changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index f7a981ff68129..99f4f83de2800 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -263,10 +263,13 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta */ mode = PS_SCALER_MODE_NORMAL; } else { + struct intel_plane *linked = + plane_state->planar_linked_plane; + mode = PS_SCALER_MODE_PLANAR; - if (plane_state->linked_plane) - mode |= PS_PLANE_Y_SEL(plane_state->linked_plane->id); + if (linked) + mode |= PS_PLANE_Y_SEL(linked->id); } } else if (INTEL_GEN(dev_priv) > 9 || IS_GEMINILAKE(dev_priv)) { mode = PS_SCALER_MODE_NORMAL; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 476ef0906ba04..98b7766eaa7af 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -321,9 +321,9 @@ void skl_update_planes_on_crtc(struct intel_atomic_state *state, if (new_plane_state->base.visible) { intel_update_plane(plane, new_crtc_state, new_plane_state); - } else if (new_plane_state->slave) { + } else if (new_plane_state->planar_slave) { struct intel_plane *master = - new_plane_state->linked_plane; + new_plane_state->planar_linked_plane; /* * We update the slave plane from this function because diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 292be4b71a821..8f125f1624bd9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11729,7 +11729,7 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) int i; for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - linked = plane_state->linked_plane; + linked = plane_state->planar_linked_plane; if (!linked) continue; @@ -11738,8 +11738,8 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) if (IS_ERR(linked_plane_state)) return PTR_ERR(linked_plane_state); - WARN_ON(linked_plane_state->linked_plane != plane); - WARN_ON(linked_plane_state->slave == plane_state->slave); + WARN_ON(linked_plane_state->planar_linked_plane != plane); + WARN_ON(linked_plane_state->planar_slave == plane_state->planar_slave); } return 0; @@ -11762,16 +11762,16 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) * in the crtc_state->active_planes mask. */ for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - if (plane->pipe != crtc->pipe || !plane_state->linked_plane) + if (plane->pipe != crtc->pipe || !plane_state->planar_linked_plane) continue; - plane_state->linked_plane = NULL; - if (plane_state->slave && !plane_state->base.visible) { + plane_state->planar_linked_plane = NULL; + if (plane_state->planar_slave && !plane_state->base.visible) { crtc_state->active_planes &= ~BIT(plane->id); crtc_state->update_planes |= BIT(plane->id); } - plane_state->slave = false; + plane_state->planar_slave = false; } if (!crtc_state->nv12_planes) @@ -11805,10 +11805,10 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) return -EINVAL; } - plane_state->linked_plane = linked; + plane_state->planar_linked_plane = linked; - linked_state->slave = true; - linked_state->linked_plane = plane; + linked_state->planar_slave = true; + linked_state->planar_linked_plane = plane; crtc_state->active_planes |= BIT(linked->id); crtc_state->update_planes |= BIT(linked->id); DRM_DEBUG_KMS("Using %s as Y plane for %s\n", linked->base.name, plane->base.name); @@ -13257,7 +13257,7 @@ intel_verify_planes(struct intel_atomic_state *state) for_each_new_intel_plane_in_state(state, plane, plane_state, i) - assert_plane(plane, plane_state->slave || + assert_plane(plane, plane_state->planar_slave || plane_state->base.visible); } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index a33644f12af13..976669f01a8c3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -559,24 +559,24 @@ struct intel_plane_state { int scaler_id; /* - * linked_plane: + * planar_linked_plane: * * ICL planar formats require 2 planes that are updated as pairs. * This member is used to make sure the other plane is also updated * when required, and for update_slave() to find the correct * plane_state to pass as argument. */ - struct intel_plane *linked_plane; + struct intel_plane *planar_linked_plane; /* - * slave: + * planar_slave: * If set don't update use the linked plane's state for updating * this plane during atomic commit with the update_slave() callback. * * It's also used by the watermark code to ignore wm calculations on * this plane. They're calculated by the linked plane's wm code. */ - u32 slave; + u32 planar_slave; struct drm_intel_sprite_colorkey ckey; }; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 7a7078d0ba23d..3d56db32291b6 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -542,7 +542,7 @@ skl_program_plane(struct intel_plane *plane, u32 y = plane_state->color_plane[color_plane].y; u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; - struct intel_plane *linked = plane_state->linked_plane; + struct intel_plane *linked = plane_state->planar_linked_plane; const struct drm_framebuffer *fb = plane_state->base.fb; u8 alpha = plane_state->base.alpha >> 8; u32 plane_color_ctl = 0; @@ -641,7 +641,7 @@ skl_update_plane(struct intel_plane *plane, { int color_plane = 0; - if (plane_state->linked_plane) { + if (plane_state->planar_linked_plane) { /* Program the UV plane */ color_plane = 1; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6aa40f5462266..6bed2ed145749 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4293,7 +4293,7 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, enum plane_id plane_id = to_intel_plane(plane)->id; u64 rate; - if (!plane_state->linked_plane) { + if (!plane_state->planar_linked_plane) { rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); plane_data_rate[plane_id] = rate; total_data_rate += rate; @@ -4307,12 +4307,12 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, * NULL if we try get_new_plane_state(), so we * always calculate from the master. */ - if (plane_state->slave) + if (plane_state->planar_slave) continue; /* Y plane rate is calculated on the slave */ rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); - y_plane_id = plane_state->linked_plane->id; + y_plane_id = plane_state->planar_linked_plane->id; plane_data_rate[y_plane_id] = rate; total_data_rate += rate; @@ -5051,12 +5051,12 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, int ret; /* Watermarks calculated in master */ - if (plane_state->slave) + if (plane_state->planar_slave) return 0; - if (plane_state->linked_plane) { + if (plane_state->planar_linked_plane) { const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id y_plane_id = plane_state->linked_plane->id; + enum plane_id y_plane_id = plane_state->planar_linked_plane->id; WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)); WARN_ON(!fb->format->is_yuv || From c750c22b2456bcd8c50067424ccabf2686306cda Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 20 Sep 2019 13:42:21 +0200 Subject: [PATCH 251/331] drm/i915: Do not add all planes when checking scalers on glk+ We cannot switch between HQ and normal mode on GLK+, so only add planes on platforms where it makes sense. We could probably restrict it even more to only add when scaler users toggles between 1 and 2, but lets just leave it for now. Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190920114235.22411-9-maarten.lankhorst@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_atomic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 99f4f83de2800..c5a552a697527 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -373,6 +373,15 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, */ if (!plane) { struct drm_plane_state *state; + + /* + * GLK+ scalers don't have a HQ mode so it + * isn't necessary to change between HQ and dyn mode + * on those platforms. + */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + continue; + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { From f9d4eae25d93a76f8ed52b1519cc27c5b3cb1dcb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Sep 2019 14:08:45 +0100 Subject: [PATCH 252/331] drm/i915/execlists: Simplify gen12_csb_parse Having decided that we only care about the promotion predicate, we can simplify gen12_csb_parse to simply check whether we need to jump to a new queue. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniele Ceraolo Spurio Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20190925130845.17952-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index caac091b3eb95..ab725a6ca0ac4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1801,9 +1801,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; - if (!ctx_away_valid && ctx_to_valid) - return true; - /* * The context switch detail is not guaranteed to be 5 when a preemption * occurs, so we can't just check for that. The check below works for @@ -1811,8 +1808,10 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * instructions and lite-restore. Preempt-to-idle via the CTRL register * would require some extra handling, but we don't support that. */ - if (new_queue && ctx_away_valid) + if (!ctx_away_valid || new_queue) { + GEM_BUG_ON(!ctx_to_valid); return true; + } /* * switch detail = 5 is covered by the case above and we do not expect a @@ -1820,7 +1819,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * use polling mode. */ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); - return false; } From 1e225a2c7477f114f271403ce48b26dd7bc1da2c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 24 Sep 2019 14:00:35 -0700 Subject: [PATCH 253/331] drm/i915/tgl: Add initial dkl pll support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The disable function can be the same as for MG phy since the same registers are used. The others are different as registers changed, also adding a empty dkl_pll_write() to be implemented later. v2: Setting the right HIP_INDEX_REG bits (José) v3: Masking non-computed registers of mg_pll_tdc_coldst_bias when getting hardware state Sharing mg_pll_enable() with TGL Reviewed-by: Imre Deak Acked-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190924210040.142075-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 96 ++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 84e734d448283..1d56027be12c2 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3086,6 +3086,78 @@ static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv, return ret; } +static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + enum tc_port tc_port = icl_pll_id_to_tc_port(id); + intel_wakeref_t wakeref; + bool ret = false; + u32 val; + + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_DISPLAY_CORE); + if (!wakeref) + return false; + + val = I915_READ(MG_PLL_ENABLE(tc_port)); + if (!(val & PLL_ENABLE)) + goto out; + + /* + * All registers read here have the same HIP_INDEX_REG even though + * they are on different building blocks + */ + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + hw_state->mg_refclkin_ctl = I915_READ(DKL_REFCLKIN_CTL(tc_port)); + hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; + + hw_state->mg_clktop2_hsclkctl = + I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port)); + hw_state->mg_clktop2_hsclkctl &= + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; + + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port)); + hw_state->mg_clktop2_coreclkctl1 &= + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + + hw_state->mg_pll_div0 = I915_READ(DKL_PLL_DIV0(tc_port)); + hw_state->mg_pll_div0 &= (DKL_PLL_DIV0_INTEG_COEFF_MASK | + DKL_PLL_DIV0_PROP_COEFF_MASK | + DKL_PLL_DIV0_FBPREDIV_MASK | + DKL_PLL_DIV0_FBDIV_INT_MASK); + + hw_state->mg_pll_div1 = I915_READ(DKL_PLL_DIV1(tc_port)); + hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK | + DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); + + hw_state->mg_pll_ssc = I915_READ(DKL_PLL_SSC(tc_port)); + hw_state->mg_pll_ssc &= (DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | + DKL_PLL_SSC_STEP_LEN_MASK | + DKL_PLL_SSC_STEP_NUM_MASK | + DKL_PLL_SSC_EN); + + hw_state->mg_pll_bias = I915_READ(DKL_PLL_BIAS(tc_port)); + hw_state->mg_pll_bias &= (DKL_PLL_BIAS_FRAC_EN_H | + DKL_PLL_BIAS_FBDIV_FRAC_MASK); + + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); + hw_state->mg_pll_tdc_coldst_bias &= (DKL_PLL_TDC_SSC_STEP_SIZE_MASK | + DKL_PLL_TDC_FEED_FWD_GAIN_MASK); + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); + return ret; +} + static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state, @@ -3220,6 +3292,12 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); } +static void dkl_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + /* TODO */ +} + static void icl_pll_power_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, i915_reg_t enable_reg) @@ -3312,7 +3390,10 @@ static void mg_pll_enable(struct drm_i915_private *dev_priv, icl_pll_power_enable(dev_priv, pll, enable_reg); - icl_mg_pll_write(dev_priv, pll); + if (INTEL_GEN(dev_priv) >= 12) + dkl_pll_write(dev_priv, pll); + else + icl_mg_pll_write(dev_priv, pll); /* * DVFS pre sequence would be here, but in our driver the cdclk code @@ -3467,11 +3548,22 @@ static const struct intel_dpll_mgr ehl_pll_mgr = { .dump_hw_state = icl_dump_hw_state, }; +static const struct intel_shared_dpll_funcs dkl_pll_funcs = { + .enable = mg_pll_enable, + .disable = mg_pll_disable, + .get_hw_state = dkl_pll_get_hw_state, +}; + static const struct dpll_info tgl_plls[] = { { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, { "TBT PLL", &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 }, - /* TODO: Add typeC plls */ + { "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { "TC PLL 5", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL5, 0 }, + { "TC PLL 6", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL6, 0 }, { }, }; From e87b9b05104f5dc2d1529d2b8fb2574e332d43e5 Mon Sep 17 00:00:00 2001 From: Vandita Kulkarni Date: Tue, 24 Sep 2019 14:00:36 -0700 Subject: [PATCH 254/331] drm/i915/tgl: Add support for dkl pll write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new function to write to dkl phy pll registers. As per the bspec all the registers are read modify write. Reviewed-by: Lucas De Marchi Signed-off-by: Vandita Kulkarni Signed-off-by: José Roberto de Souza Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190924210040.142075-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 65 ++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 1d56027be12c2..114116cdbf492 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3295,7 +3295,70 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, static void dkl_pll_write(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - /* TODO */ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id); + u32 val; + + /* + * All registers programmed here have the same HIP_INDEX_REG even + * though on different building block + */ + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + /* All the registers are RMW */ + val = I915_READ(DKL_REFCLKIN_CTL(tc_port)); + val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; + val |= hw_state->mg_refclkin_ctl; + I915_WRITE(DKL_REFCLKIN_CTL(tc_port), val); + + val = I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port)); + val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + val |= hw_state->mg_clktop2_coreclkctl1; + I915_WRITE(DKL_CLKTOP2_CORECLKCTL1(tc_port), val); + + val = I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port)); + val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); + val |= hw_state->mg_clktop2_hsclkctl; + I915_WRITE(DKL_CLKTOP2_HSCLKCTL(tc_port), val); + + val = I915_READ(DKL_PLL_DIV0(tc_port)); + val &= ~(DKL_PLL_DIV0_INTEG_COEFF_MASK | + DKL_PLL_DIV0_PROP_COEFF_MASK | + DKL_PLL_DIV0_FBPREDIV_MASK | + DKL_PLL_DIV0_FBDIV_INT_MASK); + val |= hw_state->mg_pll_div0; + I915_WRITE(DKL_PLL_DIV0(tc_port), val); + + val = I915_READ(DKL_PLL_DIV1(tc_port)); + val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK | + DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); + val |= hw_state->mg_pll_div1; + I915_WRITE(DKL_PLL_DIV1(tc_port), val); + + val = I915_READ(DKL_PLL_SSC(tc_port)); + val &= ~(DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | + DKL_PLL_SSC_STEP_LEN_MASK | + DKL_PLL_SSC_STEP_NUM_MASK | + DKL_PLL_SSC_EN); + val |= hw_state->mg_pll_ssc; + I915_WRITE(DKL_PLL_SSC(tc_port), val); + + val = I915_READ(DKL_PLL_BIAS(tc_port)); + val &= ~(DKL_PLL_BIAS_FRAC_EN_H | + DKL_PLL_BIAS_FBDIV_FRAC_MASK); + val |= hw_state->mg_pll_bias; + I915_WRITE(DKL_PLL_BIAS(tc_port), val); + + val = I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); + val &= ~(DKL_PLL_TDC_SSC_STEP_SIZE_MASK | + DKL_PLL_TDC_FEED_FWD_GAIN_MASK); + val |= hw_state->mg_pll_tdc_coldst_bias; + I915_WRITE(DKL_PLL_TDC_COLDST_BIAS(tc_port), val); + + POSTING_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); } static void icl_pll_power_enable(struct drm_i915_private *dev_priv, From 1a5c6aa43a3ae95bb4756c8a906a32519a6d291d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 24 Sep 2019 14:00:38 -0700 Subject: [PATCH 255/331] drm/i915/tgl: re-indent code to prepare for DKL changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The final save operation into pll_state of the calculations done will be different for DKL PHY. Prepare for that by reindenting code so it's easier to check for correctness. This one has no change in behavior. Reviewed-by: Matt Roper Signed-off-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190924210040.142075-4-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 119 ++++++++++-------- 1 file changed, 66 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 114116cdbf492..ce2dee38c9564 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2786,60 +2786,73 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, } ssc_steplog = 4; - pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | - MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | - MG_PLL_DIV0_FBDIV_INT(m2div_int); - - pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | - MG_PLL_DIV1_DITHER_DIV_2 | - MG_PLL_DIV1_NDIVRATIO(1) | - MG_PLL_DIV1_FBPREDIV(m1div); - - pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | - MG_PLL_LF_AFCCNTSEL_512 | - MG_PLL_LF_GAINCTRL(1) | - MG_PLL_LF_INT_COEFF(int_coeff) | - MG_PLL_LF_PROP_COEFF(prop_coeff); - - pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | - MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | - MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | - MG_PLL_FRAC_LOCK_DCODITHEREN | - MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); - if (use_ssc || m2div_rem > 0) - pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; - - pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | - MG_PLL_SSC_TYPE(2) | - MG_PLL_SSC_STEPLENGTH(ssc_steplen) | - MG_PLL_SSC_STEPNUM(ssc_steplog) | - MG_PLL_SSC_FLLEN | - MG_PLL_SSC_STEPSIZE(ssc_stepsize); - - pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART | - MG_PLL_TDC_COLDST_IREFINT_EN | - MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | - MG_PLL_TDC_TDCOVCCORR_EN | - MG_PLL_TDC_TDCSEL(3); - - pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | - MG_PLL_BIAS_INIT_DCOAMP(0x3F) | - MG_PLL_BIAS_BIAS_BONUS(10) | - MG_PLL_BIAS_BIASCAL_EN | - MG_PLL_BIAS_CTRIM(12) | - MG_PLL_BIAS_VREF_RDAC(4) | - MG_PLL_BIAS_IREFTRIM(iref_trim); - - if (refclk_khz == 38400) { - pll_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; - pll_state->mg_pll_bias_mask = 0; - } else { - pll_state->mg_pll_tdc_coldst_bias_mask = -1U; - pll_state->mg_pll_bias_mask = -1U; - } + /* write pll_state calculations */ + { + pll_state->mg_pll_div0 = + (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = + MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = + MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = + MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= + MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = + (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = + MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + + if (refclk_khz == 38400) { + pll_state->mg_pll_tdc_coldst_bias_mask = + MG_PLL_TDC_COLDST_COLDSTART; + pll_state->mg_pll_bias_mask = 0; + } else { + pll_state->mg_pll_tdc_coldst_bias_mask = -1U; + pll_state->mg_pll_bias_mask = -1U; + } - pll_state->mg_pll_tdc_coldst_bias &= pll_state->mg_pll_tdc_coldst_bias_mask; - pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + pll_state->mg_pll_tdc_coldst_bias &= + pll_state->mg_pll_tdc_coldst_bias_mask; + pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + } return true; } From ee7de6ad382d8d0fcf5bdb7443c7bc1f55e9d0f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 24 Sep 2019 14:00:39 -0700 Subject: [PATCH 256/331] drm/i915/tgl: Add dkl phy pll calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extending ICL mg calculations to also support dkl calculations. v3: Fixing iref_trim calculation for 38400 refclock BSpec: 49204 Reviewed-by: Lucas De Marchi Signed-off-by: Vandita Kulkarni Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190924210040.142075-5-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 29 +++++++++--- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 45 ++++++++++++++++--- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 14fe987888a67..c06353ae0d120 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1413,11 +1413,30 @@ static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, ref_clock = dev_priv->cdclk.hw.ref; - m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK; - m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; - m2_frac = (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? - (pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> - MG_PLL_DIV0_FBDIV_FRAC_SHIFT : 0; + if (INTEL_GEN(dev_priv) >= 12) { + m1 = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBPREDIV_MASK; + m1 = m1 >> DKL_PLL_DIV0_FBPREDIV_SHIFT; + m2_int = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBDIV_INT_MASK; + + if (pll_state->mg_pll_bias & DKL_PLL_BIAS_FRAC_EN_H) { + m2_frac = pll_state->mg_pll_bias & + DKL_PLL_BIAS_FBDIV_FRAC_MASK; + m2_frac = m2_frac >> DKL_PLL_BIAS_FBDIV_SHIFT; + } else { + m2_frac = 0; + } + } else { + m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK; + m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; + + if (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) { + m2_frac = pll_state->mg_pll_div0 & + MG_PLL_DIV0_FBDIV_FRAC_MASK; + m2_frac = m2_frac >> MG_PLL_DIV0_FBDIV_FRAC_SHIFT; + } else { + m2_frac = 0; + } + } switch (pll_state->mg_clktop2_hsclkctl & MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK) { diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index ce2dee38c9564..69abafa45ce92 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2607,7 +2607,8 @@ enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port) static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, u32 *target_dco_khz, - struct intel_dpll_hw_state *state) + struct intel_dpll_hw_state *state, + bool is_dkl) { u32 dco_min_freq, dco_max_freq; int div1_vals[] = {7, 5, 3, 2}; @@ -2629,8 +2630,13 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, continue; if (div2 >= 2) { - a_divratio = is_dp ? 10 : 5; - tlinedrv = 2; + if (is_dkl) { + a_divratio = 5; + tlinedrv = 1; + } else { + a_divratio = is_dp ? 10 : 5; + tlinedrv = 2; + } } else { a_divratio = 5; tlinedrv = 0; @@ -2693,11 +2699,12 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, u64 tmp; bool use_ssc = false; bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + bool is_dkl = INTEL_GEN(dev_priv) >= 12; memset(pll_state, 0, sizeof(*pll_state)); if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, - pll_state)) { + pll_state, is_dkl)) { DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); return false; } @@ -2705,8 +2712,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, m1div = 2; m2div_int = dco_khz / (refclk_khz * m1div); if (m2div_int > 255) { - m1div = 4; - m2div_int = dco_khz / (refclk_khz * m1div); + if (!is_dkl) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + } + if (m2div_int > 255) { DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", clock); @@ -2787,7 +2797,28 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, ssc_steplog = 4; /* write pll_state calculations */ - { + if (is_dkl) { + pll_state->mg_pll_div0 = DKL_PLL_DIV0_INTEG_COEFF(int_coeff) | + DKL_PLL_DIV0_PROP_COEFF(prop_coeff) | + DKL_PLL_DIV0_FBPREDIV(m1div) | + DKL_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = DKL_PLL_DIV1_IREF_TRIM(iref_trim) | + DKL_PLL_DIV1_TDC_TARGET_CNT(tdc_targetcnt); + + pll_state->mg_pll_ssc = DKL_PLL_SSC_IREF_NDIV_RATIO(iref_ndiv) | + DKL_PLL_SSC_STEP_LEN(ssc_steplen) | + DKL_PLL_SSC_STEP_NUM(ssc_steplog) | + (use_ssc ? DKL_PLL_SSC_EN : 0); + + pll_state->mg_pll_bias = (m2div_frac ? DKL_PLL_BIAS_FRAC_EN_H : 0) | + DKL_PLL_BIAS_FBDIV_FRAC(m2div_frac); + + pll_state->mg_pll_tdc_coldst_bias = + DKL_PLL_TDC_SSC_STEP_SIZE(ssc_stepsize) | + DKL_PLL_TDC_FEED_FWD_GAIN(feedfwgain); + + } else { pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | From 6677c3b167b3ba9fa183015f6244e2db28d99f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 24 Sep 2019 14:00:40 -0700 Subject: [PATCH 257/331] drm/i915/tgl: Return the mg/dkl pll as DDI clock for new TC ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TGL added 2 more TC ports that currently are not being handled by icl_pll_to_ddi_clk_sel(), so adding those. Reviewed-by: Lucas De Marchi Cc: Lucas De Marchi Cc: Imre Deak Reported-by: Imre Deak Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190924210040.142075-6-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index c06353ae0d120..aa470c70a1989 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1049,6 +1049,8 @@ static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, case DPLL_ID_ICL_MGPLL2: case DPLL_ID_ICL_MGPLL3: case DPLL_ID_ICL_MGPLL4: + case DPLL_ID_TGL_MGPLL5: + case DPLL_ID_TGL_MGPLL6: return DDI_CLK_SEL_MG; } } From c1f2b8124bdf2d7812a759f83a36a6d6178b5c94 Mon Sep 17 00:00:00 2001 From: James Ausmus Date: Tue, 24 Sep 2019 15:28:29 -0700 Subject: [PATCH 258/331] drm/i915/tgl: Add memory type decoding for bandwidth checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory type values have changed in TGL, so we need to translate them differently than ICL. While we're moving it, fix up the ICL translation for LPDDR4. BSpec: 53998 v2: Fix up ICL LPDDR4 entry (Ville); Drop unused values from TGL (Ville) Cc: Ville Syrjälä Cc: Stanislav Lisovskiy Signed-off-by: James Ausmus Reviewed-by: Ville Syrjälä Reviewed-by: Stuart Summers Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20190924222829.13142-1-james.ausmus@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 55 ++++++++++++++++++------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index cd58e47ab7b2a..22e83f857de83 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -35,22 +35,45 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, if (ret) return ret; - switch (val & 0xf) { - case 0: - qi->dram_type = INTEL_DRAM_DDR4; - break; - case 1: - qi->dram_type = INTEL_DRAM_DDR3; - break; - case 2: - qi->dram_type = INTEL_DRAM_LPDDR3; - break; - case 3: - qi->dram_type = INTEL_DRAM_LPDDR3; - break; - default: - MISSING_CASE(val & 0xf); - break; + if (IS_GEN(dev_priv, 12)) { + switch (val & 0xf) { + case 0: + qi->dram_type = INTEL_DRAM_DDR4; + break; + case 3: + qi->dram_type = INTEL_DRAM_LPDDR4; + break; + case 4: + qi->dram_type = INTEL_DRAM_DDR3; + break; + case 5: + qi->dram_type = INTEL_DRAM_LPDDR3; + break; + default: + MISSING_CASE(val & 0xf); + break; + } + } else if (IS_GEN(dev_priv, 11)) { + switch (val & 0xf) { + case 0: + qi->dram_type = INTEL_DRAM_DDR4; + break; + case 1: + qi->dram_type = INTEL_DRAM_DDR3; + break; + case 2: + qi->dram_type = INTEL_DRAM_LPDDR3; + break; + case 3: + qi->dram_type = INTEL_DRAM_LPDDR4; + break; + default: + MISSING_CASE(val & 0xf); + break; + } + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + qi->dram_type = INTEL_DRAM_LPDDR3; /* Conservative default */ } qi->num_channels = (val & 0xf0) >> 4; From 45d3c5cd5233258ffa9326b33ae4af59fb127dfb Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Sep 2019 16:45:42 -0700 Subject: [PATCH 259/331] drm/i915: Small joiner RAM buffer size is platform-specific According to the bspec, GLK/CNL have a smaller small joiner RAM buffer than ICL+. This feels like something that could easily change again on future platforms, so let's just add a function to return the proper per-platform buffer size. That may also slightly simplify the upcoming bigjoiner enabling. Since we have to change intel_dp_dsc_get_output_bpp()'s signature to pass the dev_priv down for the platform check, let's take the opportunity to also make that function static since it isn't used outside the intel_dp file. v2: Minor rebase on top of Maarten's changes. Bspec: 20388 Bspec: 49259 Cc: Manasi Navare Cc: Maarten Lankhorst Signed-off-by: Matt Roper Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20190925234542.24289-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 893c9807755cc..14a5c661f7103 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -68,9 +68,6 @@ #define DP_DPRX_ESI_LEN 14 -/* DP DSC small joiner has 2 FIFOs each of 640 x 6 bytes */ -#define DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER 61440 - /* DP DSC throughput values used for slice count calculations KPixels/s */ #define DP_DSC_PEAK_PIXEL_RATE 2720000 #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 @@ -498,7 +495,17 @@ u32 intel_dp_mode_to_fec_clock(u32 mode_clock) DP_DSC_FEC_OVERHEAD_FACTOR); } -static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count, +static int +small_joiner_ram_size_bits(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) + return 7680 * 8; + else + return 6144 * 8; +} + +static u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915, + u32 link_clock, u32 lane_count, u32 mode_clock, u32 mode_hdisplay) { u32 bits_per_pixel, max_bpp_small_joiner_ram; @@ -515,7 +522,8 @@ static u16 intel_dp_dsc_get_output_bpp(u32 link_clock, u32 lane_count, DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel); /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / mode_hdisplay; + max_bpp_small_joiner_ram = small_joiner_ram_size_bits(i915) / + mode_hdisplay; DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram); /* @@ -632,7 +640,8 @@ intel_dp_mode_valid(struct drm_connector *connector, true); } else if (drm_dp_sink_supports_fec(intel_dp->fec_capable)) { dsc_max_output_bpp = - intel_dp_dsc_get_output_bpp(max_link_clock, + intel_dp_dsc_get_output_bpp(dev_priv, + max_link_clock, max_lanes, target_clock, mode->hdisplay) >> 4; @@ -2059,7 +2068,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, u8 dsc_dp_slice_count; dsc_max_output_bpp = - intel_dp_dsc_get_output_bpp(pipe_config->port_clock, + intel_dp_dsc_get_output_bpp(dev_priv, + pipe_config->port_clock, pipe_config->lane_count, adjusted_mode->crtc_clock, adjusted_mode->crtc_hdisplay); From 5311f5171e98e6958fe5cf1c424a6196115baeb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 26 Sep 2019 14:31:40 +0100 Subject: [PATCH 260/331] drm/i915: Define explicit wedged on init reset state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're currently using scratch presence as a way of identifying that we entered wedged state at driver initialization time. Let's use a separate flag rather than rely on scratch. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926133142.2838-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_reset.c | 11 ++++++++++- drivers/gpu/drm/i915/gt/intel_reset.h | 9 +++++++++ drivers/gpu/drm/i915/gt/intel_reset_types.h | 6 ++++++ drivers/gpu/drm/i915/i915_gem.c | 2 +- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ae68c3786f63d..ec978b4d1be34 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -811,7 +811,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) if (!test_bit(I915_WEDGED, >->reset.flags)) return true; - if (!gt->scratch) /* Never full initialised, recovery impossible */ + /* Never fully initialised, recovery impossible */ + if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) return false; GEM_TRACE("start\n"); @@ -1279,6 +1280,14 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) return intel_gt_is_wedged(gt) ? -EIO : 0; } +void intel_gt_set_wedged_on_init(struct intel_gt *gt) +{ + BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > + I915_WEDGED_ON_INIT); + intel_gt_set_wedged(gt); + set_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + void intel_gt_init_reset(struct intel_gt *gt) { init_waitqueue_head(>->reset.queue); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 52c00199e0694..0b6ff1ee7f063 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -45,6 +45,12 @@ void intel_gt_set_wedged(struct intel_gt *gt); bool intel_gt_unset_wedged(struct intel_gt *gt); int intel_gt_terminally_wedged(struct intel_gt *gt); +/* + * There's no unset_wedged_on_init paired with this one. + * Once we're wedged on init, there's no going back. + */ +void intel_gt_set_wedged_on_init(struct intel_gt *gt); + int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); int intel_reset_guc(struct intel_gt *gt); @@ -68,6 +74,9 @@ void __intel_fini_wedge(struct intel_wedge_me *w); static inline bool __intel_reset_failed(const struct intel_reset *reset) { + GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ? + !test_bit(I915_WEDGED, &reset->flags) : false); + return unlikely(test_bit(I915_WEDGED, &reset->flags)); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 31968356e0c01..f43bc3a0fe4fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -29,11 +29,17 @@ struct intel_reset { * we set the #I915_WEDGED bit. Prior to command submission, e.g. * i915_request_alloc(), this bit is checked and the sequence * aborted (with -EIO reported to userspace) if set. + * + * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no + * longer use the GPU - similar to #I915_WEDGED bit. The difference in + * in the way we're handling "forced" unwedged (e.g. through debugfs), + * which is not allowed in case we failed to initialize. */ unsigned long flags; #define I915_RESET_BACKOFF 0 #define I915_RESET_MODESET 1 #define I915_RESET_ENGINE 2 +#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2) #define I915_WEDGED (BITS_PER_LONG - 1) struct mutex mutex; /* serialises wedging/unwedging */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2da9544fa9a4e..e2897a6662258 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1411,7 +1411,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_gt: mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gt_set_wedged(&dev_priv->gt); + intel_gt_set_wedged_on_init(&dev_priv->gt); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); From e1237523749e342d9ffb499a636ab6860a554cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 26 Sep 2019 14:31:41 +0100 Subject: [PATCH 261/331] drm/i915/execlists: Use per-process HWSP as scratch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of our commands (MI_FLUSH_DW / PIPE_CONTROL) require a post-sync write operation to be performed. Currently we're using dedicated VMA for PIPE_CONTROL and global HWSP for MI_FLUSH_DW. On execlists platforms, each of our contexts has an area that can be used as scratch space. Let's use that instead. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926133142.2838-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 -- drivers/gpu/drm/i915/gt/intel_lrc.c | 45 +++++++----------------- drivers/gpu/drm/i915/gt/intel_lrc.h | 4 +++ 3 files changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 3039cef64b112..e64db4c13df64 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -89,9 +89,6 @@ enum intel_gt_scratch_field { /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, - /* 8 bytes */ - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128, - /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ab725a6ca0ac4..fa385218ce925 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2308,12 +2308,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 slm_offset(struct intel_engine_cs *engine) -{ - return intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA); -} - /* * Typically we only have one indirect_ctx and per_ctx batch buffer which are * initialized at the beginning and shared across all contexts but this field @@ -2342,10 +2336,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* Actual scratch location is at 128 bytes offset */ batch = gen8_emit_pipe_control(batch, PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - slm_offset(engine)); + LRC_PPHWSP_SCRATCH_ADDR); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -3052,7 +3046,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) } *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ intel_ring_advance(request, cs); @@ -3063,10 +3057,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -3088,7 +3078,7 @@ static int gen8_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; /* * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL @@ -3121,7 +3111,7 @@ static int gen8_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 0); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); if (dc_flush_wa) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); @@ -3134,11 +3124,6 @@ static int gen8_emit_flush_render(struct i915_request *request, static int gen11_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - const u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - if (mode & EMIT_FLUSH) { u32 *cs; u32 flags = 0; @@ -3151,13 +3136,13 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -3175,13 +3160,13 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -3196,10 +3181,6 @@ static u32 preparser_disable(bool state) static int gen12_emit_flush_render(struct i915_request *request, u32 mode) { - const u32 scratch_addr = - intel_gt_scratch_offset(request->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - if (mode & EMIT_FLUSH) { u32 flags = 0; u32 *cs; @@ -3210,7 +3191,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; @@ -3219,7 +3200,7 @@ static int gen12_emit_flush_render(struct i915_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -3235,7 +3216,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_CS_STALL; @@ -3251,7 +3232,7 @@ static int gen12_emit_flush_render(struct i915_request *request, */ *cs++ = preparser_disable(true); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); *cs++ = preparser_disable(false); intel_ring_advance(request, cs); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index dc0252e0589e4..66ac616361c1f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -104,6 +104,10 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN +/* Space within PPHWSP reserved to be used as scratch */ +#define LRC_PPHWSP_SCRATCH 0x34 +#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) + void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void intel_lr_context_reset(struct intel_engine_cs *engine, From 7d5255e0ced46a10131175bf65789a0c1a7806a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 26 Sep 2019 14:31:42 +0100 Subject: [PATCH 262/331] drm/i915: Adjust length of MI_LOAD_REGISTER_REG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default length value of MI_LOAD_REGISTER_REG is 1. Also move it out of cmd-parser-only registers since we're going to use it in i915. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926133142.2838-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index f78b13d74e17a..9211b1ad401bb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -152,6 +152,7 @@ #define MI_FLUSH_DW_USE_PPGTT (0<<2) #define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -256,7 +257,6 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) #define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) From 132dfc78d3eb51b6fc8e667183b3572d41e3fe9f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 26 Sep 2019 12:56:44 +0200 Subject: [PATCH 263/331] drm/i915: Drop the IRQ-off asserts The lockdep_assert_irqs_disabled() check is needless. The previous lockdep_assert_held() check ensures that the lock is acquired and while the lock is acquired lockdep also prints a warning if the interrupts are not disabled if they have to be. These IRQ-off asserts trigger on PREEMPT_RT because the locks become sleeping locks and do not really disable interrupts. Remove lockdep_assert_irqs_disabled(). Reported-by: Clark Williams Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926105644.16703-3-bigeasy@linutronix.de --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 09c68dda20988..0c6a1b11e2594 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -120,7 +120,6 @@ __dma_fence_signal__notify(struct dma_fence *fence, struct dma_fence_cb *cur, *tmp; lockdep_assert_held(fence->lock); - lockdep_assert_irqs_disabled(); list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); @@ -275,7 +274,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) bool i915_request_enable_breadcrumb(struct i915_request *rq) { lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; @@ -325,7 +323,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); /* * We must wait for b->irq_lock so that we know the interrupt handler From e3792238c1dde5059e44e0f14899497ac875443d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 26 Sep 2019 12:56:43 +0200 Subject: [PATCH 264/331] drm/i915: Don't disable interrupts for intel_engine_breadcrumbs_irq() The function intel_engine_breadcrumbs_irq() is always invoked from an interrupt handler and for that reason it invokes (as an optimisation) only spin_lock() for locking assuming that the interrupts are already disabled. The function intel_engine_signal_breadcrumbs() is provided to disable interrupts while the former function is invoked so that assumption is also true for callers from preemptible context. On PREEMPT_RT local_irq_disable() really disables interrupts and this forbids to invoke spin_lock() which becomes a sleeping spinlock. This is also problematic with `threadirqs' in conjunction with irq_work. With force threading the interrupt handler, the handler is invoked with disabled BH but with interrupts enabled. This is okay and the lock itself is never acquired in IRQ context. This changes with irq_work (signal_irq_work()) which _still_ invokes intel_engine_breadcrumbs_irq() from IRQ context. Lockdep should see this and complain. Acquire the locks in intel_engine_breadcrumbs_irq() with _irqsave() suffix and let all callers invoke intel_engine_breadcrumbs_irq() directly instead using intel_engine_signal_breadcrumbs(). Reported-by: Clark Williams Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926105644.16703-2-bigeasy@linutronix.de --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 16 +++++----------- drivers/gpu/drm/i915/gt/intel_engine.h | 1 - drivers/gpu/drm/i915/gt/intel_hangcheck.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 0c6a1b11e2594..55317081d48b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -133,9 +133,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; + unsigned long flags; LIST_HEAD(signal); - spin_lock(&b->irq_lock); + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); @@ -181,30 +182,23 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) } } - spin_unlock(&b->irq_lock); + spin_unlock_irqrestore(&b->irq_lock, flags); list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); struct list_head cb_list; - spin_lock(&rq->lock); + spin_lock_irqsave(&rq->lock, flags); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); __dma_fence_signal__notify(&rq->fence, &cb_list); - spin_unlock(&rq->lock); + spin_unlock_irqrestore(&rq->lock, flags); i915_request_put(rq); } } -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - local_irq_disable(); - intel_engine_breadcrumbs_irq(engine); - local_irq_enable(); -} - static void signal_irq_work(struct irq_work *work) { struct intel_engine_cs *engine = diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d3c6993f4f46f..c9e8c8ccbd47f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -335,7 +335,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine); void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 40f62f780be5c..9814b18b32adc 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -284,7 +284,7 @@ static void hangcheck_elapsed(struct work_struct *work) for_each_engine(engine, gt->i915, id) { struct hangcheck hc; - intel_engine_signal_breadcrumbs(engine); + intel_engine_breadcrumbs_irq(engine); hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ec978b4d1be34..d08226f5bea59 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -711,7 +711,7 @@ static void reset_finish_engine(struct intel_engine_cs *engine) engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - intel_engine_signal_breadcrumbs(engine); + intel_engine_breadcrumbs_irq(engine); } static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) From 56316cbc9c986ce907bfcf4f3762f2494e44f439 Mon Sep 17 00:00:00 2001 From: Anna Karas Date: Thu, 26 Sep 2019 15:21:58 +0300 Subject: [PATCH 265/331] drm/i915/perf: Fix use of kernel-doc format in structure members Insert structure members names into their descriptions to follow kernel-doc format. Cc: Chris Wilson Signed-off-by: Anna Karas Reviewed-by: Chris Wilson Acked-by: Lionel Landwerlin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926122158.13028-1-anna.karas@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fcf7423075ef1..b3c7dbc1832a0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1134,7 +1134,7 @@ struct i915_perf_stream { struct i915_oa_config *oa_config; /** - * The OA context specific information. + * @pinned_ctx: The OA context specific information. */ struct intel_context *pinned_ctx; u32 specific_ctx_id; @@ -1148,7 +1148,7 @@ struct i915_perf_stream { int period_exponent; /** - * State of the OA buffer. + * @oa_buffer: State of the OA buffer. */ struct { struct i915_vma *vma; @@ -1159,7 +1159,7 @@ struct i915_perf_stream { int size_exponent; /** - * Locks reads and writes to all head/tail state + * @ptr_lock: Locks reads and writes to all head/tail state * * Consider: the head and tail pointer state needs to be read * consistently from a hrtimer callback (atomic context) and @@ -1181,7 +1181,7 @@ struct i915_perf_stream { spinlock_t ptr_lock; /** - * One 'aging' tail pointer and one 'aged' tail pointer ready to + * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to * used for reading. * * Initial values of 0xffffffff are invalid and imply that an @@ -1193,18 +1193,18 @@ struct i915_perf_stream { } tails[2]; /** - * Index for the aged tail ready to read() data up to. + * @aged_tail_idx: Index for the aged tail ready to read() data up to. */ unsigned int aged_tail_idx; /** - * A monotonic timestamp for when the current aging tail pointer + * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer * was read; used to determine when it is old enough to trust. */ u64 aging_timestamp; /** - * Although we can always read back the head pointer register, + * @head: Although we can always read back the head pointer register, * we prefer to avoid trusting the HW state, just to avoid any * risk that some hardware condition could * somehow bump the * head pointer unpredictably and cause us to forward the wrong From 74b2089a105f43c33d2f2124bd03d1085fe7c9fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Thu, 26 Sep 2019 12:06:33 +0200 Subject: [PATCH 266/331] drm/i915: Add definitions for MI_MATH command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use it in i915 for updating parts of unmasked registers from within a batch. We're also adding Gen8+ versions of CS_GPR registers (aka MI_MATH_REG in the coprocessor). Signed-off-by: Michał Winiarski Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190926100635.9416-4-michal.winiarski@intel.com --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 23 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 4 ++++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 9211b1ad401bb..b0227ab2fe1b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -241,6 +241,29 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) +/* Opcodes for MI_MATH_INSTR */ +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) +/* Registers used as operands in MI_MATH_INSTR */ +#define MI_MATH_REG(x) (x) +#define MI_MATH_REG_SRCA 0x20 +#define MI_MATH_REG_SRCB 0x21 +#define MI_MATH_REG_ACCU 0x31 +#define MI_MATH_REG_ZF 0x32 +#define MI_MATH_REG_CF 0x33 + /* * Commands used only by the command parser */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e752de9470bd0..5e30f1a146240 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2483,6 +2483,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ +/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ +#define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) +#define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) + #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) From a3f56e7da5231c902925711940835b6716f63f73 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Sep 2019 20:34:46 +0100 Subject: [PATCH 267/331] drm/i915/selftests: Exercise concurrent submission to all engines The simplest and most maximal submission we can do, a thread to submit requests unto each engine. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190925193446.26007-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_request.c | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index b3688543ed7d0..57cd4180d06c9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1062,6 +1062,130 @@ static int live_sequential_engines(void *arg) return err; } +static int __live_parallel_engine1(void *arg) +{ + struct intel_engine_cs *engine = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq; + int err; + + mutex_lock(&engine->i915->drm.struct_mutex); + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + mutex_unlock(&engine->i915->drm.struct_mutex); + return PTR_ERR(rq); + } + + i915_request_get(rq); + i915_request_add(rq); + mutex_unlock(&engine->i915->drm.struct_mutex); + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu request + sync\n", engine->name, count); + return 0; +} + +static int __live_parallel_engineN(void *arg) +{ + struct intel_engine_cs *engine = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq; + + mutex_lock(&engine->i915->drm.struct_mutex); + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + mutex_unlock(&engine->i915->drm.struct_mutex); + return PTR_ERR(rq); + } + + i915_request_add(rq); + mutex_unlock(&engine->i915->drm.struct_mutex); + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu requests\n", engine->name, count); + return 0; +} + +static int live_parallel_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_engine1, + __live_parallel_engineN, + NULL, + }; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int (* const *fn)(void *arg); + int err = 0; + + /* + * Check we can submit requests to all engines concurrently. This + * tests that we load up the system maximally. + */ + + for (fn = func; !err && *fn; fn++) { + struct task_struct *tsk[I915_NUM_ENGINES] = {}; + struct igt_live_test t; + + mutex_lock(&i915->drm.struct_mutex); + err = igt_live_test_begin(&t, i915, __func__, ""); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + break; + + for_each_engine(engine, i915, id) { + tsk[id] = kthread_run(*fn, engine, + "igt/parallel:%s", + engine->name); + if (IS_ERR(tsk[id])) { + err = PTR_ERR(tsk[id]); + break; + } + get_task_struct(tsk[id]); + } + + for_each_engine(engine, i915, id) { + int status; + + if (IS_ERR_OR_NULL(tsk[id])) + continue; + + status = kthread_stop(tsk[id]); + if (status && !err) + err = status; + + put_task_struct(tsk[id]); + } + + mutex_lock(&i915->drm.struct_mutex); + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + } + + return err; +} + static int max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1240,6 +1364,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nop_request), SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), + SUBTEST(live_parallel_engines), SUBTEST(live_empty_request), SUBTEST(live_breadcrumbs_smoketest), }; From c113236718e89561f309705f1a78126b3df93a21 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 27 Sep 2019 12:08:49 +0100 Subject: [PATCH 268/331] drm/i915: Extract GT render sleep (rc6) management Continuing the theme of breaking intel_pm.c up in a reasonable chunk of powermanagement utilities, pull out the rc6 setup into its GT handler. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190919143840.20384-1-andi.shyti@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20190927110849.28734-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 + drivers/gpu/drm/i915/gt/intel_gt.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 92 +- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 11 +- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 712 ++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rc6.h | 25 + drivers/gpu/drm/i915/gt/intel_rc6_types.h | 28 + drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 50 ++ drivers/gpu/drm/i915/i915_debugfs.c | 11 +- drivers/gpu/drm/i915/i915_drv.h | 7 - drivers/gpu/drm/i915/i915_pmu.c | 9 +- drivers/gpu/drm/i915/i915_sysfs.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 794 +----------------- drivers/gpu/drm/i915/intel_pm.h | 3 - .../drm/i915/selftests/i915_live_selftests.h | 1 + 18 files changed, 916 insertions(+), 843 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_rc6.c create mode 100644 drivers/gpu/drm/i915/gt/intel_rc6.h create mode 100644 drivers/gpu/drm/i915/gt/intel_rc6_types.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_gt_pm.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6313e7b4bd784..8d407ff7d59dc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -85,6 +85,7 @@ gt-y += \ gt/intel_gt_pm_irq.o \ gt/intel_hangcheck.o \ gt/intel_lrc.o \ + gt/intel_rc6.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ gt/intel_ringbuffer.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index a11ad4d914ca8..cca192ecff8b6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -137,7 +137,6 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) bool i915_gem_load_power_context(struct drm_i915_private *i915) { - intel_gt_pm_enable(&i915->gt); return switch_to_kernel_context_sync(&i915->gt); } @@ -188,6 +187,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); intel_uc_suspend(&i915->gt.uc); + intel_gt_suspend(&i915->gt); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index ce61c83d68e97..66fc49b76ea8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -11,6 +11,7 @@ #include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_rc6.h" static int __engine_unpark(struct intel_wakeref *wf) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index eef9bdae8ebb4..0e5909ee06579 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_mocs.h" +#include "intel_rc6.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -369,6 +370,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) return err; + intel_gt_pm_init(gt); + return 0; } @@ -387,8 +390,8 @@ void intel_gt_driver_release(struct intel_gt *gt) { /* Paranoia: make sure we have disabled everything before we exit. */ intel_gt_pm_disable(gt); + intel_gt_pm_fini(gt); - intel_cleanup_gt_powersave(gt->i915); intel_gt_fini_scratch(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 2ccf8cacaa0a7..42f175d9b98c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -11,6 +11,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_pm.h" +#include "intel_rc6.h" #include "intel_wakeref.h" static void pm_notify(struct intel_gt *gt, int state) @@ -90,6 +91,16 @@ void intel_gt_pm_init_early(struct intel_gt *gt) BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } +void intel_gt_pm_init(struct intel_gt *gt) +{ + /* + * Enabling power-management should be "self-healing". If we cannot + * enable a feature, simply leave it disabled with a notice to the + * user. + */ + intel_rc6_init(>->rc6); +} + static bool reset_engines(struct intel_gt *gt) { if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) @@ -124,40 +135,14 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) __intel_engine_reset(engine, false); } -static bool is_mock_device(const struct intel_gt *gt) -{ - return I915_SELFTEST_ONLY(gt->awake == -1); -} - -void intel_gt_pm_enable(struct intel_gt *gt) +void intel_gt_pm_disable(struct intel_gt *gt) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(gt->i915)) - return; - - if (is_mock_device(gt)) - return; - - intel_gt_pm_get(gt); - - for_each_engine(engine, gt->i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* force kernel context reload */ - intel_engine_pm_put(engine); - } - - intel_gt_pm_put(gt); + intel_sanitize_gt_powersave(gt->i915); } -void intel_gt_pm_disable(struct intel_gt *gt) +void intel_gt_pm_fini(struct intel_gt *gt) { - if (is_mock_device(gt)) - return; - - intel_sanitize_gt_powersave(gt->i915); + intel_rc6_fini(>->rc6); } int intel_gt_resume(struct intel_gt *gt) @@ -173,6 +158,9 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_rc6_sanitize(>->rc6); + for_each_engine(engine, gt->i915, id) { struct intel_context *ce; @@ -197,11 +185,51 @@ int intel_gt_resume(struct intel_gt *gt) break; } } + + intel_rc6_enable(>->rc6); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } +static void wait_for_idle(struct intel_gt *gt) +{ + mutex_lock(>->i915->drm.struct_mutex); /* XXX */ + do { + if (i915_gem_wait_for_idle(gt->i915, + I915_WAIT_LOCKED, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + } + } while (i915_retire_requests(gt->i915)); + mutex_unlock(>->i915->drm.struct_mutex); + + intel_gt_pm_wait_for_idle(gt); +} + +void intel_gt_suspend(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + + /* We expect to be idle already; but also want to be independent */ + wait_for_idle(gt); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + intel_rc6_disable(>->rc6); +} + void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); @@ -213,3 +241,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) return intel_uc_runtime_resume(>->uc); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_gt_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index d1f3e2e239374..ab794e8533561 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -43,12 +43,21 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) } void intel_gt_pm_init_early(struct intel_gt *gt); -void intel_gt_pm_enable(struct intel_gt *gt); +void intel_gt_pm_init(struct intel_gt *gt); void intel_gt_pm_disable(struct intel_gt *gt); +void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); + int intel_gt_resume(struct intel_gt *gt); +void intel_gt_suspend(struct intel_gt *gt); + void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); +static inline bool is_mock_gt(const struct intel_gt *gt) +{ + return I915_SELFTEST_ONLY(gt->awake == -1); +} + #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index e64db4c13df64..7134f1319bbe9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -18,6 +18,7 @@ #include "i915_vma.h" #include "intel_engine_types.h" #include "intel_reset_types.h" +#include "intel_rc6_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -67,6 +68,8 @@ struct intel_gt { */ intel_wakeref_t awake; + struct intel_rc6 rc6; + struct blocking_notifier_head pm_notifications; ktime_t last_init_time; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c new file mode 100644 index 0000000000000..d6167dd592e98 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -0,0 +1,712 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_sideband.h" + +/** + * DOC: RC6 + * + * RC6 is a special power stage which allows the GPU to enter an very + * low-voltage mode when idle, using down to 0V while at this stage. This + * stage is entered automatically when the GPU is idle when RC6 support is + * enabled, and as soon as new workload arises GPU wakes up automatically as + * well. + * + * There are different RC6 modes available in Intel GPU, which differentiate + * among each other with the latency required to enter and leave RC6 and + * voltage consumed by the GPU in different states. + * + * The combination of the following flags define which states GPU is allowed + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and + * RC6pp is deepest RC6. Their support by hardware varies according to the + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one + * which brings the most power savings; deeper states save more power, but + * require higher latency to switch to and wake up. + */ + +static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) +{ + return container_of(rc6, struct intel_gt, rc6); +} + +static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->uncore; +} + +static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->i915; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static void gen11_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1)); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE); +} + +static void gen9_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6_mode; + + /* 2b: Program RC6 thresholds.*/ + if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); + } else { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); +} + +static void gen8_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE); +} + +static void gen6_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6vids, rc6_mask; + int ret; + + set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(i915)) + set(uncore, GEN6_RC6_THRESHOLD, 125000); + else + set(uncore, GEN6_RC6_THRESHOLD, 50000); + set(uncore, GEN6_RC6p_THRESHOLD, 150000); + set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + + /* We don't use those on Haswell */ + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(i915)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(i915)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + set(uncore, GEN6_RC_CONTROL, + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE); + + rc6vids = 0; + ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + if (IS_GEN(i915, 6) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN(i915, 6) && + (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } +} + +/* Check that the pcbr address is not empty. */ +static int chv_rc6_init(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); + + pctx_paddr = (paddr & ~4095); + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + } + + return 0; +} + +static int vlv_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_gem_object *pctx; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + resource_size_t pcbr_offset; + + pcbr_offset = (pcbr & ~4095) - i915->dsm.start; + pctx = i915_gem_object_create_stolen_for_preallocated(i915, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + if (!pctx) + return -ENOMEM; + + goto out; + } + + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(i915, pctx_size); + if (!pctx) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return -ENOMEM; + } + + GEM_BUG_ON(range_overflows_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = i915->dsm.start + pctx->stolen->start; + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + +out: + rc6->pctx = pctx; + return 0; +} + +static void chv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2a: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + + /* TO threshold set to 500 us (0x186 * 1.28 us) */ + set(uncore, GEN6_RC6_THRESHOLD, 0x186); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); +} + +static void vlv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + set(uncore, GEN6_RC_CONTROL, + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); +} + +static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + u32 rc6_ctx_base, rc_ctl, rc_sw_target; + bool enable_rc6 = true; + + rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); + rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); + rc_sw_target &= RC_SW_TARGET_STATE_MASK; + rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; + DRM_DEBUG_DRIVER("BIOS enabled RC states: " + "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", + onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), + onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), + rc_sw_target); + + if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { + DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); + enable_rc6 = false; + } + + /* + * The exact context size is not known for BXT, so assume a page size + * for this check. + */ + rc6_ctx_base = + intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; + if (!(rc6_ctx_base >= i915->dsm_reserved.start && + rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) { + DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); + enable_rc6 = false; + } + + if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) { + DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); + enable_rc6 = false; + } + + return enable_rc6; +} + +static bool rc6_supported(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!HAS_RC6(i915)) + return false; + + if (intel_vgpu_active(i915)) + return false; + + if (is_mock_gt(rc6_to_gt(rc6))) + return false; + + if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { + dev_notice(i915->drm.dev, + "RC6 and powersaving disabled by BIOS\n"); + return false; + } + + return true; +} + +static void rpm_get(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(rc6->wakeref); + pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = true; +} + +static void rpm_put(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(!rc6->wakeref); + pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = false; +} + +static void __intel_rc6_disable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (INTEL_GEN(i915) >= 9) + set(uncore, GEN9_PG_ENABLE, 0); + set(uncore, GEN6_RC_CONTROL, 0); + set(uncore, GEN6_RC_STATE, 0); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); +} + +void intel_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + int err; + + /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ + rpm_get(rc6); + + if (!rc6_supported(rc6)) + return; + + if (IS_CHERRYVIEW(i915)) + err = chv_rc6_init(rc6); + else if (IS_VALLEYVIEW(i915)) + err = vlv_rc6_init(rc6); + else + err = 0; + + /* Sanitize rc6, ensure it is disabled before we are ready. */ + __intel_rc6_disable(rc6); + + rc6->supported = err == 0; +} + +void intel_rc6_sanitize(struct intel_rc6 *rc6) +{ + if (rc6->supported) + __intel_rc6_disable(rc6); +} + +void intel_rc6_enable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->supported) + return; + + GEM_BUG_ON(rc6->enabled); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (IS_CHERRYVIEW(i915)) + chv_rc6_enable(rc6); + else if (IS_VALLEYVIEW(i915)) + vlv_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 11) + gen11_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 9) + gen9_rc6_enable(rc6); + else if (IS_BROADWELL(i915)) + gen8_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 6) + gen6_rc6_enable(rc6); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + /* rc6 is ready, runtime-pm is go! */ + rpm_put(rc6); + rc6->enabled = true; +} + +void intel_rc6_disable(struct intel_rc6 *rc6) +{ + if (!rc6->enabled) + return; + + rpm_get(rc6); + rc6->enabled = false; + + __intel_rc6_disable(rc6); +} + +void intel_rc6_fini(struct intel_rc6 *rc6) +{ + struct drm_i915_gem_object *pctx; + + intel_rc6_disable(rc6); + + pctx = fetch_and_zero(&rc6->pctx); + if (pctx) + i915_gem_object_put(pctx); + + if (rc6->wakeref) + rpm_put(rc6); +} + +static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) +{ + u32 lower, upper, tmp; + int loop = 2; + + /* + * The register accessed do not need forcewake. We borrow + * uncore lock to prevent concurrent access to range reg. + */ + lockdep_assert_held(&uncore->lock); + + /* + * vlv and chv residency counters are 40 bits in width. + * With a control bit, we can choose between upper or lower + * 32bit window into this counter. + * + * Although we always use the counter in high-range mode elsewhere, + * userspace may attempt to read the value before rc6 is initialised, + * before we have set the default VLV_COUNTER_CONTROL value. So always + * set the high bit to be safe. + */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + do { + tmp = upper; + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + lower = intel_uncore_read_fw(uncore, reg); + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + } while (upper != tmp && --loop); + + /* + * Everywhere else we always use VLV_COUNTER_CONTROL with the + * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set + * now. + */ + + return lower | (u64)upper << 8; +} + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + u64 time_hw, prev_hw, overflow_hw; + unsigned int fw_domains; + unsigned long flags; + unsigned int i; + u32 mul, div; + + if (!rc6->supported) + return 0; + + /* + * Store previous hw counter values for counter wrap-around handling. + * + * There are only four interesting registers and they live next to each + * other so we can use the relative address, compared to the smallest + * one as the index into driver storage. + */ + i = (i915_mmio_reg_offset(reg) - + i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); + if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency))) + return 0; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + mul = 1000000; + div = i915->czclk_freq; + overflow_hw = BIT_ULL(40); + time_hw = vlv_residency_raw(uncore, reg); + } else { + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(i915)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } + + overflow_hw = BIT_ULL(32); + time_hw = intel_uncore_read_fw(uncore, reg); + } + + /* + * Counter wrap handling. + * + * But relying on a sufficient frequency of queries otherwise counters + * can still wrap. + */ + prev_hw = rc6->prev_hw_residency[i]; + rc6->prev_hw_residency[i] = time_hw; + + /* RC6 delta from last sample. */ + if (time_hw >= prev_hw) + time_hw -= prev_hw; + else + time_hw += overflow_hw - prev_hw; + + /* Add delta to RC6 extended raw driver copy. */ + time_hw += rc6->cur_residency[i]; + rc6->cur_residency[i] = time_hw; + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return mul_u64_u32_div(time_hw, mul, div); +} + +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); +} diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h new file mode 100644 index 0000000000000..5e6711f364575 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_H +#define INTEL_RC6_H + +#include "i915_reg.h" + +struct intel_engine_cs; +struct intel_rc6; + +void intel_rc6_init(struct intel_rc6 *rc6); +void intel_rc6_fini(struct intel_rc6 *rc6); + +void intel_rc6_sanitize(struct intel_rc6 *rc6); +void intel_rc6_enable(struct intel_rc6 *rc6); +void intel_rc6_disable(struct intel_rc6 *rc6); + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); + +#endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h new file mode 100644 index 0000000000000..214f354d6ae45 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_TYPES_H +#define INTEL_RC6_TYPES_H + +#include +#include + +#include "intel_engine_types.h" + +struct drm_i915_gem_object; + +struct intel_rc6 { + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + + struct drm_i915_gem_object *pctx; + + bool supported : 1; + bool enabled : 1; + bool wakeref : 1; +}; + +#endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c new file mode 100644 index 0000000000000..87985bd464231 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -0,0 +1,50 @@ + +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +static int live_gt_resume(void *arg) +{ + struct intel_gt *gt = arg; + IGT_TIMEOUT(end_time); + int err; + + /* Do several suspend/resume cycles to check we don't explode! */ + do { + intel_gt_suspend(gt); + + if (gt->rc6.enabled) { + pr_err("rc6 still enabled after suspend!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = intel_gt_resume(gt); + if (err) + break; + + if (gt->rc6.supported && !gt->rc6.enabled) { + pr_err("rc6 not enabled upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +int intel_gt_pm_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_gt_resume), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b5b449a88cf1d..fec9fb7cc384b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -42,6 +42,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_gt_pm.h" #include "gt/intel_reset.h" +#include "gt/intel_rc6.h" #include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" @@ -1168,11 +1169,13 @@ static void print_rc6_res(struct seq_file *m, const char *title, const i915_reg_t reg) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); + intel_wakeref_t wakeref; - seq_printf(m, "%s %u (%llu us)\n", - title, I915_READ(reg), - intel_rc6_residency_us(dev_priv, reg)); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(&i915->uncore, reg), + intel_rc6_residency_us(&i915->gt.rc6, reg)); } static int vlv_drpc_info(struct seq_file *m) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b3c7dbc1832a0..0e4e77373ee56 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -599,19 +599,12 @@ struct intel_rps { struct intel_rps_ei ei; }; -struct intel_rc6 { - bool enabled; - u64 prev_hw_residency[4]; - u64 cur_residency[4]; -}; - struct intel_llc_pstate { bool enabled; }; struct intel_gen6_power_mgmt { struct intel_rps rps; - struct intel_rc6 rc6; struct intel_llc_pstate llc_pstate; }; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 3310353890fb6..d0508719492e3 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -11,6 +11,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -116,21 +117,21 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) return enable; } -static u64 __get_rc6(const struct intel_gt *gt) +static u64 __get_rc6(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; u64 val; - val = intel_rc6_residency_ns(i915, + val = intel_rc6_residency_ns(>->rc6, IS_VALLEYVIEW(i915) ? VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6); if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); return val; } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d8a3b180c0847..034b8abc50624 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -30,6 +30,8 @@ #include #include +#include "gt/intel_rc6.h" + #include "i915_drv.h" #include "i915_sysfs.h" #include "intel_pm.h" @@ -49,7 +51,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, u64 res = 0; with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - res = intel_rc6_residency_us(dev_priv, reg); + res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6bed2ed145749..bfcf03ab5245d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -45,26 +45,6 @@ #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" -/** - * DOC: RC6 - * - * RC6 is a special power stage which allows the GPU to enter an very - * low-voltage mode when idle, using down to 0V while at this stage. This - * stage is entered automatically when the GPU is idle when RC6 support is - * enabled, and as soon as new workload arises GPU wakes up automatically as well. - * - * There are different RC6 modes available in Intel GPU, which differentiate - * among each other with the latency required to enter and leave RC6 and - * voltage consumed by the GPU in different states. - * - * The combination of the following flags define which states GPU is allowed - * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and - * RC6pp is deepest RC6. Their support by hardware varies according to the - * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one - * which brings the most power savings; deeper states save more power, but - * require higher latency to switch to and wake up. - */ - static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { if (HAS_LLC(dev_priv)) { @@ -6918,142 +6898,27 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) return err; } -static void gen9_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN9_PG_ENABLE, 0); -} - static void gen9_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - static void gen6_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - static void cherryview_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) -{ - /* We're doing forcewake before Disabling RC6, - * This what the BIOS expects when going into suspend */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RC_CONTROL, 0); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void valleyview_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) -{ - bool enable_rc6 = true; - unsigned long rc6_ctx_base; - u32 rc_ctl; - int rc_sw_target; - - rc_ctl = I915_READ(GEN6_RC_CONTROL); - rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> - RC_SW_TARGET_STATE_SHIFT; - DRM_DEBUG_DRIVER("BIOS enabled RC states: " - "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", - onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), - onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), - rc_sw_target); - - if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { - DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); - enable_rc6 = false; - } - - /* - * The exact context size is not known for BXT, so assume a page size - * for this check. - */ - rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; - if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) && - (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) { - DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); - enable_rc6 = false; - } - - if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { - DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_PUSHBUS_CONTROL) || - !I915_READ(GEN8_PUSHBUS_ENABLE) || - !I915_READ(GEN8_PUSHBUS_SHIFT)) { - DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN6_GFXPAUSE)) { - DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_MISC_CTRL0)) { - DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); - enable_rc6 = false; - } - - return enable_rc6; -} - -static bool sanitize_rc6(struct drm_i915_private *i915) -{ - struct intel_device_info *info = mkwrite_device_info(i915); - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) { - info->has_rc6 = 0; - info->has_rps = false; - } - - if (info->has_rc6 && - IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { - DRM_INFO("RC6 disabled by BIOS\n"); - info->has_rc6 = 0; - } - - /* - * We assume that we do not have any deep rc6 levels if we don't have - * have the previous rc6 level supported, i.e. we use HAS_RC6() - * as the initial coarse check for rc6 in general, moving on to - * progressively finer/deeper levels. - */ - if (!info->has_rc6 && info->has_rc6p) - info->has_rc6p = 0; - - return info->has_rc6; -} - static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7140,203 +7005,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen11_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* - * 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GT_UC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from plane_state is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1)); - - /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */ - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen9_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6_mode; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - if (INTEL_GEN(dev_priv) >= 10) { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - } else if (IS_SKYLAKE(dev_priv)) { - /* - * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only - * when CPG is enabled - */ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); - } else { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); - } - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GT_UC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from plane_state is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - rc6_mode); - - /* - * 3b: Enable Coarse Power Gating only when RC6 is enabled. - * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6. - */ - if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - I915_WRITE(GEN9_PG_ENABLE, 0); - else - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - - /* 3: Enable RC6 */ - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_RC6_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void gen8_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7377,75 +7045,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen6_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6vids, rc6_mask; - u32 gtfifodbg; - int ret; - - I915_WRITE(GEN6_RC_STATE, 0); - - /* Clear the DBG now so we don't confuse earlier errors */ - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* disable the counters and set deterministic thresholds */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 125000); - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); - I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - - /* We don't use those on Haswell */ - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - if (HAS_RC6p(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - if (HAS_RC6pp(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - I915_WRITE(GEN6_RC_CONTROL, - rc6_mask | - GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); - - rc6vids = 0; - ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, - &rc6vids, NULL); - if (IS_GEN(dev_priv, 6) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); - rc6vids &= 0xffff00; - rc6vids |= GEN6_ENCODE_RC6_VID(450); - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); - if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); - } - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void gen6_enable_rps(struct drm_i915_private *dev_priv) { /* Here begins a magic sequence of register writes to enable @@ -7662,100 +7261,6 @@ static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) return max_t(u32, val, 0xc0); } -/* Check that the pctx buffer wasn't move under us. */ -static void valleyview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON(pctx_addr != dev_priv->dsm.start + - dev_priv->vlv_pctx->stolen->start); -} - - -/* Check that the pcbr address is not empty. */ -static void cherryview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); -} - -static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) -{ - resource_size_t pctx_paddr, paddr; - resource_size_t pctx_size = 32*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = dev_priv->dsm.end + 1 - pctx_size; - GEM_BUG_ON(paddr > U32_MAX); - - pctx_paddr = (paddr & (~4095)); - I915_WRITE(VLV_PCBR, pctx_paddr); - } - - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); -} - -static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - resource_size_t pctx_paddr; - resource_size_t pctx_size = 24*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if (pcbr) { - /* BIOS set it up already, grab the pre-alloc'd space */ - resource_size_t pcbr_offset; - - pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, - pcbr_offset, - I915_GTT_OFFSET_NONE, - pctx_size); - goto out; - } - - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - - /* - * From the Gunit register HAS: - * The Gfx driver is expected to program this register and ensure - * proper allocation within Gfx stolen memory. For example, this - * register should be programmed such than the PCBR range does not - * overlap with other ranges, such as the frame buffer, protected - * memory, or any other relevant ranges. - */ - pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); - if (!pctx) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); - goto out; - } - - GEM_BUG_ON(range_overflows_t(u64, - dev_priv->dsm.start, - pctx->stolen->start, - U32_MAX)); - pctx_paddr = dev_priv->dsm.start + pctx->stolen->start; - I915_WRITE(VLV_PCBR, pctx_paddr); - -out: - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); - dev_priv->vlv_pctx = pctx; -} - -static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - - pctx = fetch_and_zero(&dev_priv->vlv_pctx); - if (pctx) - i915_gem_object_put(pctx); -} - static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { dev_priv->gt_pm.rps.gpll_ref_freq = @@ -7772,8 +7277,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; - valleyview_setup_pctx(dev_priv); - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | @@ -7828,8 +7331,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; - cherryview_setup_pctx(dev_priv); - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | @@ -7880,64 +7381,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) "Odd GPU freq values\n"); } -static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - valleyview_cleanup_pctx(dev_priv); -} - -static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg, rc6_mode, pcbr; - - gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | - GT_FIFO_FREE_ENTRIES_CHV); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - cherryview_check_pctx(dev_priv); - - /* 1a & 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2a: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ - I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - /* For now we assume BIOS is allocating and populating the PCBR */ - pcbr = I915_READ(VLV_PCBR); - - /* 3: Enable RC6 */ - rc6_mode = 0; - if (pcbr >> VLV_PCBR_ADDR_SHIFT) - rc6_mode = GEN7_RC_CTL_TO_MODE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void cherryview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; @@ -7982,49 +7425,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg; - - valleyview_check_pctx(dev_priv); - - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - I915_WRITE(GEN6_RC_CONTROL, - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void valleyview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; @@ -8551,14 +7951,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - /* - * RPM depends on RC6 to save restore the GT HW context, so make RC6 a - * requirement. - */ - if (!sanitize_rc6(dev_priv)) { - DRM_INFO("RC6 disabled, disabling runtime PM support\n"); - pm_runtime_get(&dev_priv->drm.pdev->dev); - } + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->has_rps = false; /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -8593,19 +7988,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->cur_freq = rps->idle_freq; } -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv)) - valleyview_cleanup_gt_powersave(dev_priv); - - if (!HAS_RC6(dev_priv)) - pm_runtime_put(&dev_priv->drm.pdev->dev); -} - void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ - dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); if (INTEL_GEN(dev_priv) >= 11) @@ -8626,25 +8011,6 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) i915->gt_pm.llc_pstate.enabled = false; } -static void intel_disable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rc6.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rc6(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = false; -} - static void intel_disable_rps(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->gt_pm.rps.lock); @@ -8670,8 +8036,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->gt_pm.rps.lock); - intel_disable_rc6(dev_priv); - intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); @@ -8691,29 +8055,6 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) i915->gt_pm.llc_pstate.enabled = true; } -static void intel_enable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (dev_priv->gt_pm.rc6.enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_enable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) - gen11_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 9) - gen9_enable_rc6(dev_priv); - else if (IS_BROADWELL(dev_priv)) - gen8_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_enable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = true; -} - static void intel_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8755,8 +8096,6 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->gt_pm.rps.lock); - if (HAS_RC6(dev_priv)) - intel_enable_rc6(dev_priv); if (HAS_RPS(dev_priv)) intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) @@ -9818,133 +9157,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } -static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - u32 lower, upper, tmp; - int loop = 2; - - /* - * The register accessed do not need forcewake. We borrow - * uncore lock to prevent concurrent access to range reg. - */ - lockdep_assert_held(&dev_priv->uncore.lock); - - /* - * vlv and chv residency counters are 40 bits in width. - * With a control bit, we can choose between upper or lower - * 32bit window into this counter. - * - * Although we always use the counter in high-range mode elsewhere, - * userspace may attempt to read the value before rc6 is initialised, - * before we have set the default VLV_COUNTER_CONTROL value. So always - * set the high bit to be safe. - */ - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - do { - tmp = upper; - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); - lower = I915_READ_FW(reg); - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - } while (upper != tmp && --loop); - - /* - * Everywhere else we always use VLV_COUNTER_CONTROL with the - * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set - * now. - */ - - return lower | (u64)upper << 8; -} - -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u64 time_hw, prev_hw, overflow_hw; - unsigned int fw_domains; - unsigned long flags; - unsigned int i; - u32 mul, div; - - if (!HAS_RC6(dev_priv)) - return 0; - - /* - * Store previous hw counter values for counter wrap-around handling. - * - * There are only four interesting registers and they live next to each - * other so we can use the relative address, compared to the smallest - * one as the index into driver storage. - */ - i = (i915_mmio_reg_offset(reg) - - i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); - if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency))) - return 0; - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); - - spin_lock_irqsave(&uncore->lock, flags); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mul = 1000000; - div = dev_priv->czclk_freq; - overflow_hw = BIT_ULL(40); - time_hw = vlv_residency_raw(dev_priv, reg); - } else { - /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ - if (IS_GEN9_LP(dev_priv)) { - mul = 10000; - div = 12; - } else { - mul = 1280; - div = 1; - } - - overflow_hw = BIT_ULL(32); - time_hw = intel_uncore_read_fw(uncore, reg); - } - - /* - * Counter wrap handling. - * - * But relying on a sufficient frequency of queries otherwise counters - * can still wrap. - */ - prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i]; - dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw; - - /* RC6 delta from last sample. */ - if (time_hw >= prev_hw) - time_hw -= prev_hw; - else - time_hw += overflow_hw - prev_hw; - - /* Add delta to RC6 extended raw driver copy. */ - time_hw += dev_priv->gt_pm.rc6.cur_residency[i]; - dev_priv->gt_pm.rc6.cur_residency[i] = time_hw; - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irqrestore(&uncore->lock, flags); - - return mul_u64_u32_div(time_hw, mul, div); -} - -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, - i915_reg_t reg) -{ - return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); -} - u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) { u32 cagf; diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index e3573e1e16e38..93d192d0610ae 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -32,7 +32,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); @@ -72,8 +71,6 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv); int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, i915_reg_t reg); -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, i915_reg_t reg); u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 66d83c1390c16..6713efea350b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -16,6 +16,7 @@ selftest(gt_engines, intel_engine_live_selftests) selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) selftest(gt_lrc, intel_lrc_live_selftests) +selftest(gt_pm, intel_gt_pm_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) From 9cd6c339e34ab13479fb139b0b9f5ffc9743bfc1 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 27 Sep 2019 15:25:54 +0300 Subject: [PATCH 269/331] drm/i915: Update DRIVER_DATE to 20190927 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0e4e77373ee56..c0f6c1f72b621 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -106,8 +106,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190822" -#define DRIVER_TIMESTAMP 1566477988 +#define DRIVER_DATE "20190927" +#define DRIVER_TIMESTAMP 1569587154 struct drm_i915_gem_object; From 901045c3f0f43566f9a8a2f5716c2c0a497979e6 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Sep 2019 15:21:21 -0700 Subject: [PATCH 270/331] drm/i915/huc: fix version parsing from CSS header The HuC FW has silently switched to encoding the version the same way as the GuC FW does, i.e. major.minor.patch instead of just major.minor. All the current blobs follow the new scheme, but since minor and patch are both zero there is no difference in the end results and we happily load them. New binaries, however, will have non-zero values in there, so we need to make sure to parse them correctly. Signed-off-by: Daniele Ceraolo Spurio Cc: Anusha Srivatsa Cc: Michal Wajdeczko Reviewed-by: Stuart Summers Acked-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20190925222121.4000-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 23 ++++---------------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 8 +++---- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index ea9a807abd4f3..bb878119f06ca 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -339,25 +339,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) } /* Get version numbers from the CSS header */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR, - css->sw_version); - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR, - css->sw_version); - break; - - default: - MISSING_CASE(uc_fw->type); - break; - } + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index ae58e8a8c53b1..f8f6c91a0df6c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -69,11 +69,9 @@ struct uc_css_header { char username[8]; char buildnumber[12]; u32 sw_version; -#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0) -#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16) -#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0) +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) u32 reserved[14]; u32 header_info; } __packed; From 283a4095af9dd95e5d6d9f3e7f8b66a4b9f84082 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 25 Sep 2019 13:12:50 -0700 Subject: [PATCH 271/331] drm/i915/dmc: Update ICL DMC version to v1.09 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a new version of DMC for ICL - v1.09. This version adds the Half Refresh Rate capability into DMC. Cc: José Roberto de Souza Signed-off-by: Anusha Srivatsa Reviewed-by: José Roberto de Souza Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20190925201250.18136-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 546577e39b4ee..09870a31b4f0f 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -44,8 +44,8 @@ #define TGL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(TGL_CSR_PATH); -#define ICL_CSR_PATH "i915/icl_dmc_ver1_07.bin" -#define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) +#define ICL_CSR_PATH "i915/icl_dmc_ver1_09.bin" +#define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 9) #define ICL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(ICL_CSR_PATH); From 3b51be4e4061bd5e0f1b73f56cfecaa879c76d51 Mon Sep 17 00:00:00 2001 From: Clinton A Taylor Date: Thu, 26 Sep 2019 14:06:56 -0700 Subject: [PATCH 272/331] drm/i915/tc: Update DP_MODE programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BSpec was updated(r146548) with a new MG_DP_MODE Programming table, now taking in consideration the pin assignment and allowing us to optimize power by shutting down available but not needed lanes. It was tested on ICL and TGL, with adaptors that used pin assignment C and B, reversing the connector and going to different modes testing the not needed lane shutdown. v5: Using crtc_state->lane_count instead of dp.lane_count BSpec: 21735 BSpec: 49292 Cc: Imre Deak Cc: Lucas De Marchi Reviewed-by: Imre Deak Signed-off-by: Clinton A Taylor Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190926210659.56317-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 92 +++++++++++++----------- drivers/gpu/drm/i915/display/intel_tc.c | 15 ++++ drivers/gpu/drm/i915/display/intel_tc.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 5 ++ 4 files changed, 72 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index aa470c70a1989..20393d39a29a6 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3091,11 +3091,14 @@ icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) I915_WRITE(MG_MISC_SUS0(tc_port), val); } -static void icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port) +static void +icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->base.port; - u32 ln0, ln1, lane_mask; + u32 ln0, ln1, pin_assignment; + u8 width; if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) return; @@ -3103,50 +3106,57 @@ static void icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port) ln0 = I915_READ(MG_DP_MODE(0, port)); ln1 = I915_READ(MG_DP_MODE(1, port)); - switch (intel_dig_port->tc_mode) { - case TC_PORT_DP_ALT: - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); - ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); + ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); - lane_mask = intel_tc_port_get_lane_mask(intel_dig_port); + /* DPPATC */ + pin_assignment = intel_tc_port_get_pin_assignment_mask(intel_dig_port); + width = crtc_state->lane_count; - switch (lane_mask) { - case 0x1: - case 0x4: - break; - case 0x2: + switch (pin_assignment) { + case 0x0: + WARN_ON(intel_dig_port->tc_mode != TC_PORT_LEGACY); + if (width == 1) { + ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x1: + if (width == 4) { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x2: + if (width == 2) { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x3: + case 0x5: + if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; - break; - case 0x3: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - case 0x8: ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; - break; - case 0xC: - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - case 0xF: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - default: - MISSING_CASE(lane_mask); + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - - case TC_PORT_LEGACY: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE; - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE; + case 0x4: + case 0x6: + if (width == 1) { + ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } break; - default: - MISSING_CASE(intel_dig_port->tc_mode); - return; + MISSING_CASE(pin_assignment); } I915_WRITE(MG_DP_MODE(0, port), ln0); @@ -3239,7 +3249,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, dig_port->ddi_io_power_domain); /* 6. */ - icl_program_mg_dp_mode(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); /* * 7.a - Steps in this function should only be executed over MST @@ -3321,7 +3331,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - icl_program_mg_dp_mode(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 11) @@ -3391,7 +3401,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - icl_program_mg_dp_mode(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 11) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index f923f9cbd33c3..7773169b7331b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -67,6 +67,21 @@ u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); } +u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 pin_mask; + + pin_mask = intel_uncore_read(uncore, + PORT_TX_DFLEXPA1(dig_port->tc_phy_fia)); + + WARN_ON(pin_mask == 0xffffffff); + + return (pin_mask & DP_PIN_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx)) >> + DP_PIN_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); +} + int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 783d755314357..463f1b3c836f0 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -13,6 +13,7 @@ struct intel_digital_port; bool intel_tc_port_connected(struct intel_digital_port *dig_port); u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); +u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5e30f1a146240..014fa5dbde83c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -11861,6 +11861,11 @@ enum skl_power_gate { #define PORT_TX_DFLEXDPCSSS(fia) _MMIO_FIA((fia), 0x00894) #define DP_PHY_MODE_STATUS_NOT_SAFE(idx) (1 << (idx)) +#define PORT_TX_DFLEXPA1(fia) _MMIO_FIA((fia), 0x00880) +#define DP_PIN_ASSIGNMENT_SHIFT(idx) ((idx) * 4) +#define DP_PIN_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 4)) +#define DP_PIN_ASSIGNMENT(idx, x) ((x) << ((idx) * 4)) + /* This register controls the Display State Buffer (DSB) engines. */ #define _DSBSL_INSTANCE_BASE 0x70B00 #define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ From 978c3e539be210268b6c6cfeef0c97e18c00d7a7 Mon Sep 17 00:00:00 2001 From: Clinton A Taylor Date: Thu, 26 Sep 2019 14:06:57 -0700 Subject: [PATCH 273/331] drm/i915/tgl: Add dkl phy programming sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added DKL Phy sequences and helpers functions to program voltage swing, clock gating and dp mode. It is not written in DP enabling sequence but "PHY Clockgating programming" states that clock gating should be enabled after the link training but doing so causes all the following trainings to fail so not enabling it for. v2: Setting the right HIP_INDEX_REG bits (José) v3: Adding the meaning of each column of tgl_dkl_phy_ddi_translations Adding if gen >= 12 on intel_ddi_hdmi_level() and intel_ddi_pre_enable_hdmi() instead of reuse part of gen >= 11 if v4: Moved the DP_MODE lane programing to another patch as ICL also needed it Sharing icl_phy_set_clock_gating() and icl_program_mg_dp_mode() with TGL as bits and programing as now it almost identical to ICL BSpec: 49292 BSpec: 49190 Cc: Imre Deak Cc: Lucas De Marchi Reviewed-by: Imre Deak Signed-off-by: José Roberto de Souza Signed-off-by: Clinton A Taylor Link: https://patchwork.freedesktop.org/patch/msgid/20190926210659.56317-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 174 ++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 8 -- 2 files changed, 154 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 20393d39a29a6..b463e51f8b459 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -586,6 +586,26 @@ static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = { { 0x0, 0x00, 0x00 }, /* 3 0 */ }; +struct tgl_dkl_phy_ddi_buf_trans { + u32 dkl_vswing_control; + u32 dkl_preshoot_control; + u32 dkl_de_emphasis_control; +}; + +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ + { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ + { 0x2, 0x0, 0x0b }, /* 0 2 400mV 6 dB */ + { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ + { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ + { 0x2, 0x0, 0x03 }, /* 1 1 600mV 3.5 dB */ + { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ + { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ + { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ + { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -872,7 +892,14 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(dev_priv) >= 12) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, + 0, &n_entries); + else + n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + default_entry = n_entries - 1; + } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); @@ -2334,7 +2361,13 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(dev_priv) >= 12) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, encoder->type, + intel_dp->link_rate, &n_entries); + else + n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); @@ -2776,6 +2809,62 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level); } +static void +tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, + u32 level) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); + const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; + u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; + + n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + ddi_translations = tgl_dkl_phy_ddi_translations; + + if (level >= n_entries) + level = n_entries - 1; + + dpcnt_mask = (DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK); + dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations[level].dkl_vswing_control); + dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations[level].dkl_de_emphasis_control); + dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations[level].dkl_preshoot_control); + + for (ln = 0; ln < 2; ln++) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); + + /* All the registers are RMW */ + val = I915_READ(DKL_TX_DPCNTL0(tc_port)); + val &= ~dpcnt_mask; + val |= dpcnt_val; + I915_WRITE(DKL_TX_DPCNTL0(tc_port), val); + + val = I915_READ(DKL_TX_DPCNTL1(tc_port)); + val &= ~dpcnt_mask; + val |= dpcnt_val; + I915_WRITE(DKL_TX_DPCNTL1(tc_port), val); + + val = I915_READ(DKL_TX_DPCNTL2(tc_port)); + val &= ~DKL_TX_DP20BITMODE; + I915_WRITE(DKL_TX_DPCNTL2(tc_port), val); + } +} + +static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, + int link_clock, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + if (intel_phy_is_combo(dev_priv, phy)) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); +} + static u32 translate_signal_level(int signal_levels) { int i; @@ -2807,7 +2896,10 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate, + level, encoder->type); + else if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, intel_dp->link_rate, level, encoder->type); else if (IS_CANNONLAKE(dev_priv)) @@ -3071,24 +3163,39 @@ icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) MG_DP_MODE_CFG_GAONPWR_GATING; for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_DP_MODE(ln, port)); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); + val = I915_READ(DKL_DP_MODE(tc_port)); + } else { + val = I915_READ(MG_DP_MODE(ln, port)); + } + if (enable) val |= bits; else val &= ~bits; - I915_WRITE(MG_DP_MODE(ln, port), val); + + if (INTEL_GEN(dev_priv) >= 12) + I915_WRITE(DKL_DP_MODE(tc_port), val); + else + I915_WRITE(MG_DP_MODE(ln, port), val); } - bits = MG_MISC_SUS0_CFG_TR2PWR_GATING | MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | MG_MISC_SUS0_CFG_DGPWR_GATING; + if (INTEL_GEN(dev_priv) == 11) { + bits = MG_MISC_SUS0_CFG_TR2PWR_GATING | + MG_MISC_SUS0_CFG_CL2PWR_GATING | + MG_MISC_SUS0_CFG_GAONPWR_GATING | + MG_MISC_SUS0_CFG_TRPWR_GATING | + MG_MISC_SUS0_CFG_CL1PWR_GATING | + MG_MISC_SUS0_CFG_DGPWR_GATING; - val = I915_READ(MG_MISC_SUS0(tc_port)); - if (enable) - val |= (bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3)); - else - val &= ~(bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK); - I915_WRITE(MG_MISC_SUS0(tc_port), val); + val = I915_READ(MG_MISC_SUS0(tc_port)); + if (enable) + val |= (bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3)); + else + val &= ~(bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK); + I915_WRITE(MG_MISC_SUS0(tc_port), val); + } } static void @@ -3097,14 +3204,22 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, { struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum port port = intel_dig_port->base.port; + enum tc_port tc_port = intel_port_to_tc(dev_priv, port); u32 ln0, ln1, pin_assignment; u8 width; if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) return; - ln0 = I915_READ(MG_DP_MODE(0, port)); - ln1 = I915_READ(MG_DP_MODE(1, port)); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0)); + ln0 = I915_READ(DKL_DP_MODE(tc_port)); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); + ln1 = I915_READ(DKL_DP_MODE(tc_port)); + } else { + ln0 = I915_READ(MG_DP_MODE(0, port)); + ln1 = I915_READ(MG_DP_MODE(1, port)); + } ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); @@ -3159,8 +3274,15 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, MISSING_CASE(pin_assignment); } - I915_WRITE(MG_DP_MODE(0, port), ln0); - I915_WRITE(MG_DP_MODE(1, port), ln1); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0)); + I915_WRITE(DKL_DP_MODE(tc_port), ln0); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); + I915_WRITE(DKL_DP_MODE(tc_port), ln1); + } else { + I915_WRITE(MG_DP_MODE(0, port), ln0); + I915_WRITE(MG_DP_MODE(1, port), ln1); + } } static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, @@ -3265,7 +3387,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, icl_phy_set_clock_gating(dig_port, false); /* 7.e */ - icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, + tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, encoder->type); /* 7.f */ @@ -3297,6 +3419,15 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, /* 7.k */ intel_dp_stop_link_train(intel_dp); + /* + * TODO: enable clock gating + * + * It is not written in DP enabling sequence but "PHY Clockgating + * programming" states that clock gating should be enabled after the + * link training but doing so causes all the following trainings to fail + * so not enabling it for now. + */ + /* 7.l */ intel_ddi_enable_fec(encoder, crtc_state); intel_dsc_enable(encoder, crtc_state); @@ -3404,7 +3535,10 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, icl_program_mg_dp_mode(dig_port, crtc_state); icl_phy_set_clock_gating(dig_port, false); - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, + level, INTEL_OUTPUT_HDMI); + else if (INTEL_GEN(dev_priv) == 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, INTEL_OUTPUT_HDMI); else if (IS_CANNONLAKE(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 014fa5dbde83c..058aa5ca8b73f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10249,14 +10249,6 @@ enum skl_power_gate { _DKL_TX_DW18) #define _DKL_DP_MODE 0xA0 -#define DKL_DP_MODE_CFG_GAONPWR_GATING (1 << 1) -#define DKL_DP_MODE_CFG_DIGPWR_GATING (1 << 2) -#define DKL_DP_MODE_CFG_CLNPWR_GATING (1 << 3) -#define DKL_DP_MODE_CFG_TRPWR_GATING (1 << 4) -#define DKL_DP_MODE_CFG_TR2PWR_GATING (1 << 5) -#define DKL_DP_MODE_CFG_GATING_CTRL_MASK (0x1f << 1) -#define DKL_DP_MODE_CFG_DP_X1_MODE (1 << 6) -#define DKL_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define DKL_DP_MODE(tc_port) _MMIO(_PORT(tc_port, \ _DKL_PHY1_BASE, \ _DKL_PHY2_BASE) + \ From a839136ca47f79dcdb403cd7a0c5f4798466ac48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 26 Sep 2019 14:06:58 -0700 Subject: [PATCH 274/331] drm/i915/tgl: Fix dkl link training MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link training is failling when running link at 2.7GHz and 1.62GHz and following BSpec pll algorithm. Comparing the values calculated and the ones from the reference table it looks like MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO should not always set to 5. For DP ports ICL mg pll algorithm sets it to 10 or 5 based on div2 value, that matches with dkl hardcoded table. So implementing this way as it proved to work in HW and leaving a comment so we know why it do not match BSpec. v4: Using the same is_dp check as ICL, need testing on HDMI over tc port Issue reported on BSpec 49204. Reviewed-by: Imre Deak Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190926210659.56317-3-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 69abafa45ce92..be69a23442940 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2630,13 +2630,13 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, continue; if (div2 >= 2) { - if (is_dkl) { - a_divratio = 5; - tlinedrv = 1; - } else { - a_divratio = is_dp ? 10 : 5; - tlinedrv = 2; - } + /* + * Note: a_divratio not matching TGL BSpec + * algorithm but matching hardcoded values and + * working on HW for DP alt-mode at least + */ + a_divratio = is_dp ? 10 : 5; + tlinedrv = is_dkl ? 1 : 2; } else { a_divratio = 5; tlinedrv = 0; From f663769a5eefbeb304a7b4749fdd28c24944d9a7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 26 Sep 2019 14:06:59 -0700 Subject: [PATCH 275/331] drm/i915/tgl: initialize TC and TBT ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that TC support was added, initialize DDIs. Reviewed-by: José Roberto de Souza Acked-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190926210659.56317-4-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 8f125f1624bd9..bbe088b9d0579 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15340,9 +15340,14 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) return; if (INTEL_GEN(dev_priv) >= 12) { - /* TODO: initialize TC ports as well */ intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); + intel_ddi_init(dev_priv, PORT_F); + intel_ddi_init(dev_priv, PORT_G); + intel_ddi_init(dev_priv, PORT_H); + intel_ddi_init(dev_priv, PORT_I); icl_dsi_init(dev_priv); } else if (IS_ELKHARTLAKE(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); From b178a3f68128383a1ad2988464f0e964712563f6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 27 Sep 2019 18:33:48 +0100 Subject: [PATCH 276/331] drm/i915: check for kernel_context Explosions during early driver init on the error path. Make sure we fail gracefully. [ 9547.672258] BUG: kernel NULL pointer dereference, address: 000000000000007c [ 9547.672288] #PF: supervisor read access in kernel mode [ 9547.672292] #PF: error_code(0x0000) - not-present page [ 9547.672296] PGD 8000000846b41067 P4D 8000000846b41067 PUD 797034067 PMD 0 [ 9547.672303] Oops: 0000 [#1] SMP PTI [ 9547.672307] CPU: 1 PID: 25634 Comm: i915_selftest Tainted: G U 5.3.0-rc8+ #73 [ 9547.672313] Hardware name: /NUC6i7KYB, BIOS KYSKLi70.86A.0050.2017.0831.1924 08/31/2017 [ 9547.672395] RIP: 0010:intel_context_unpin+0x9/0x100 [i915] [ 9547.672400] Code: 6b 60 00 e9 17 ff ff ff bd fc ff ff ff e9 7c ff ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 41 54 55 53 <8b> 47 7c 83 f8 01 74 26 8d 48 ff f0 0f b1 4f 7c 48 8d 57 7c 75 05 [ 9547.672413] RSP: 0018:ffffae8ac24ff878 EFLAGS: 00010246 [ 9547.672417] RAX: ffff944a1b7842d0 RBX: ffff944a1b784000 RCX: ffff944a12dd6fa8 [ 9547.672422] RDX: ffff944a1b7842c0 RSI: ffff944a12dd5328 RDI: 0000000000000000 [ 9547.672428] RBP: 0000000000000000 R08: ffff944a11e5d840 R09: 0000000000000000 [ 9547.672433] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 9547.672438] R13: ffffffffc11aaf00 R14: 00000000ffffffe4 R15: ffff944a0e29bf38 [ 9547.672443] FS: 00007fc259b88ac0(0000) GS:ffff944a1f880000(0000) knlGS:0000000000000000 [ 9547.672449] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9547.672454] CR2: 000000000000007c CR3: 0000000853346003 CR4: 00000000003606e0 [ 9547.672459] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 9547.672464] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 9547.672469] Call Trace: [ 9547.672518] intel_engine_cleanup_common+0xe3/0x270 [i915] [ 9547.672567] execlists_destroy+0xe/0x30 [i915] [ 9547.672669] intel_engines_init+0x94/0xf0 [i915] [ 9547.672749] i915_gem_init+0x191/0x950 [i915] Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190927173409.31175-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f451d5076bdef..80fd072ac719d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -820,8 +820,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - intel_context_unpin(engine->kernel_context); - intel_context_put(engine->kernel_context); + if (engine->kernel_context) { + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + } GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); intel_wa_list_free(&engine->ctx_wa_list); From a3f356b273f9b860cd28be1d4ec25cd2782cda0c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 27 Sep 2019 18:33:49 +0100 Subject: [PATCH 277/331] drm/i915: simplify i915_gem_init_early i915_gem_init_early doesn't need to return anything. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190927173409.31175-3-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 5 +---- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index dd9613e457239..364f7d0b7e319 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -614,9 +614,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_gt_init_early(&dev_priv->gt, dev_priv); - ret = i915_gem_init_early(dev_priv); - if (ret < 0) - goto err_gt; + i915_gem_init_early(dev_priv); /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); @@ -638,7 +636,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) err_gem: i915_gem_cleanup_early(dev_priv); -err_gt: intel_gt_driver_late_release(&dev_priv->gt); vlv_free_s0ix_state(dev_priv); err_workqueues: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c0f6c1f72b621..337d8306416a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2243,7 +2243,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, int i915_gem_init_userptr(struct drm_i915_private *dev_priv); void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_sanitize(struct drm_i915_private *i915); -int i915_gem_init_early(struct drm_i915_private *dev_priv); +void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e2897a6662258..3d3fda4cae997 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1533,7 +1533,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) i915_gem_init__objects(i915); } -int i915_gem_init_early(struct drm_i915_private *dev_priv) +void i915_gem_init_early(struct drm_i915_private *dev_priv) { int err; @@ -1545,8 +1545,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) err = i915_gemfs_init(dev_priv); if (err) DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); - - return 0; } void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) From 42b899fb9a3fe40e7b59e8815247a34973729d9a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Sep 2019 22:06:46 +0100 Subject: [PATCH 278/331] drm/i915/selftests: Do not try to sanitize mock HW If we are mocking the device, skip trying to sanitize the pm HW state. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20190927210646.29664-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 42f175d9b98c4..29fa1dabbc2e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -137,7 +137,8 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) void intel_gt_pm_disable(struct intel_gt *gt) { - intel_sanitize_gt_powersave(gt->i915); + if (!is_mock_gt(gt)) + intel_sanitize_gt_powersave(gt->i915); } void intel_gt_pm_fini(struct intel_gt *gt) From 260e6b712769d74304093a699796ebf134aef095 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Sep 2019 22:17:47 +0100 Subject: [PATCH 279/331] drm/i915: Pass intel_gt to has-reset? As we execute GPU resets on a gt/ basis, and use the intel_gt as the primary for all other reset functions, also use it for the has-reset? predicates. Gradually simplifying the churn of pointers. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20190927211749.2181-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 21 ++++++++++++------- drivers/gpu/drm/i915/gt/intel_reset.h | 5 ++--- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 12 +++++------ drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 4 ++-- .../gpu/drm/i915/gt/selftest_workarounds.c | 8 +++---- drivers/gpu/drm/i915/i915_getparam.c | 4 ++-- 8 files changed, 31 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bbe088b9d0579..f1328c08f4adb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4273,7 +4273,7 @@ __intel_display_resume(struct drm_device *dev, static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) { return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display && - intel_has_gpu_reset(dev_priv)); + intel_has_gpu_reset(&dev_priv->gt)); } void intel_prepare_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d08226f5bea59..ea5cf3a28fbe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -546,8 +546,10 @@ typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); -static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +static reset_func intel_get_gpu_reset(const struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; + if (INTEL_GEN(i915) >= 8) return gen8_reset_engines; else if (INTEL_GEN(i915) >= 6) @@ -571,7 +573,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) int ret = -ETIMEDOUT; int retry; - reset = intel_get_gpu_reset(gt->i915); + reset = intel_get_gpu_reset(gt); if (!reset) return -ENODEV; @@ -591,17 +593,20 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) return ret; } -bool intel_has_gpu_reset(struct drm_i915_private *i915) +bool intel_has_gpu_reset(const struct intel_gt *gt) { if (!i915_modparams.reset) return NULL; - return intel_get_gpu_reset(i915); + return intel_get_gpu_reset(gt); } -bool intel_has_reset_engine(struct drm_i915_private *i915) +bool intel_has_reset_engine(const struct intel_gt *gt) { - return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; + if (i915_modparams.reset < 2) + return false; + + return INTEL_INFO(gt->i915)->has_reset_engine; } int intel_reset_guc(struct intel_gt *gt) @@ -958,7 +963,7 @@ void intel_gt_reset(struct intel_gt *gt, awake = reset_prepare(gt); - if (!intel_has_gpu_reset(gt->i915)) { + if (!intel_has_gpu_reset(gt)) { if (i915_modparams.reset) dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else @@ -1179,7 +1184,7 @@ void intel_gt_handle_error(struct intel_gt *gt, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) { + if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 0b6ff1ee7f063..8e8d5f7611665 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -14,7 +14,6 @@ #include "intel_engine_types.h" #include "intel_reset_types.h" -struct drm_i915_private; struct i915_request; struct intel_engine_cs; struct intel_gt; @@ -80,7 +79,7 @@ static inline bool __intel_reset_failed(const struct intel_reset *reset) return unlikely(test_bit(I915_WEDGED, &reset->flags)); } -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); +bool intel_has_gpu_reset(const struct intel_gt *gt); +bool intel_has_reset_engine(const struct intel_gt *gt); #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index a0098fc35921c..9c0c8441c22a5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -458,7 +458,7 @@ static int igt_reset_nop_engine(void *arg) /* Check that we can engine-reset during non-user portions */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; file = mock_file(gt->i915); @@ -559,7 +559,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) /* Check that we can issue an engine reset on an idle engine (no-op) */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (active) { @@ -791,7 +791,7 @@ static int __igt_reset_engines(struct intel_gt *gt, * with any other engine. */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (flags & TEST_ACTIVE) { @@ -1547,7 +1547,7 @@ static int igt_handle_error(void *arg) /* Check that we can issue a global GPU and engine reset */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (!engine || !intel_engine_can_store_dword(engine)) @@ -1689,7 +1689,7 @@ static int igt_reset_engines_atomic(void *arg) /* Check that the engines resets are usable from atomic context */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (USES_GUC_SUBMISSION(gt->i915)) @@ -1746,7 +1746,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) bool saved_hangcheck; int err; - if (!intel_has_gpu_reset(gt->i915)) + if (!intel_has_gpu_reset(gt)) return 0; if (intel_gt_is_wedged(gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 22ea2e7470643..93f2fcdc49bf8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1310,7 +1310,7 @@ static int live_preempt_hang(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(&i915->gt)) return 0; mutex_lock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 00a4f60cdfd5f..d79482db7fe88 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -112,7 +112,7 @@ static int igt_atomic_engine_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (USES_GUC_SUBMISSION(gt->i915)) @@ -170,7 +170,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; - if (!intel_has_gpu_reset(gt->i915)) + if (!intel_has_gpu_reset(gt)) return 0; if (intel_gt_is_wedged(gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 999a98f00494f..d40ce0709bff4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -747,7 +747,7 @@ static int live_reset_whitelist(void *arg) igt_global_reset_lock(&i915->gt); - if (intel_has_reset_engine(i915)) { + if (intel_has_reset_engine(&i915->gt)) { err = check_whitelist_across_reset(engine, do_engine_reset, "engine"); @@ -755,7 +755,7 @@ static int live_reset_whitelist(void *arg) goto out; } - if (intel_has_gpu_reset(i915)) { + if (intel_has_gpu_reset(&i915->gt)) { err = check_whitelist_across_reset(engine, do_device_reset, "device"); @@ -1131,7 +1131,7 @@ live_gpu_reset_workarounds(void *arg) struct wa_lists lists; bool ok; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(&i915->gt)) return 0; ctx = kernel_context(i915); @@ -1178,7 +1178,7 @@ live_engine_reset_workarounds(void *arg) struct wa_lists lists; int ret = 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(&i915->gt)) return 0; ctx = kernel_context(i915); diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 5d9101376a3d2..f4b3cbb1adce7 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -79,8 +79,8 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, break; case I915_PARAM_HAS_GPU_RESET: value = i915_modparams.enable_hangcheck && - intel_has_gpu_reset(i915); - if (value && intel_has_reset_engine(i915)) + intel_has_gpu_reset(&i915->gt); + if (value && intel_has_reset_engine(&i915->gt)) value = 2; break; case I915_PARAM_HAS_RESOURCE_STREAMER: From 4e18ca703f2167d8bcb7f736271744131e3c390b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Sep 2019 22:17:48 +0100 Subject: [PATCH 280/331] drm/i915/selftests: Distinguish mock device from no wakeref On systems that have no runtime-pm, we mark the wakeref as being -1. We therefore cannot use that value for the mock-gt indicator, so opt for -ENODEV instead. The wakeref should never be an error value -- one hopes! Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20190927211749.2181-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index ab794e8533561..997770d3a9682 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -57,7 +57,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt); static inline bool is_mock_gt(const struct intel_gt *gt) { - return I915_SELFTEST_ONLY(gt->awake == -1); + return I915_SELFTEST_ONLY(gt->awake == -ENODEV); } #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 91f15fa728cd0..2448067822aff 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -182,6 +182,7 @@ struct drm_i915_private *mock_gem_device(void) i915_gem_init__mm(i915); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ + i915->gt.awake = -ENODEV; i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) @@ -192,8 +193,6 @@ struct drm_i915_private *mock_gem_device(void) INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler); - i915->gt.awake = -1; - intel_timelines_init(i915); mutex_lock(&i915->drm.struct_mutex); From 4abc6e7c917b1929d8fad2c2364b270cae5ed4ca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Sep 2019 22:17:49 +0100 Subject: [PATCH 281/331] drm/i915/selftests: Provide a mock GPU reset routine For those mock tests that may wish to pretend triggering a GPU reset and processing the cleanup. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20190927211749.2181-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_reset.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ea5cf3a28fbe2..76938fa3a1b9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -542,6 +542,13 @@ static int gen8_reset_engines(struct intel_gt *gt, return ret; } +static int mock_reset(struct intel_gt *gt, + intel_engine_mask_t mask, + unsigned int retry) +{ + return 0; +} + typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); @@ -550,7 +557,9 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - if (INTEL_GEN(i915) >= 8) + if (is_mock_gt(gt)) + return mock_reset; + else if (INTEL_GEN(i915) >= 8) return gen8_reset_engines; else if (INTEL_GEN(i915) >= 6) return gen6_reset_engines; From 50d16d44cce45f172bc4b27dcd353b1107aa7429 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Sep 2019 15:49:19 +0100 Subject: [PATCH 282/331] drm/i915/selftests: Exercise context switching in parallel We currently test context switching on each engine as a basic stress test (just verifying that nothing explodes if we execute 2 requests from different contexts sequentially). What we have not tested is what happens if we try and do so on all available engines simultaneously, putting our SW and the HW under the maximal stress. v2: Clone the set of engines from the first context into the secondary contexts. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190930144919.27992-1-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_context.c | 225 ++++++++++++++++++ 1 file changed, 225 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index dc25bcc3e3723..0f4d0644a4800 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -156,6 +156,230 @@ static int live_nop_switch(void *arg) return err; } +struct parallel_switch { + struct task_struct *tsk; + struct intel_context *ce[2]; +}; + +static int __live_parallel_switch1(void *data) +{ + struct parallel_switch *arg = data; + struct drm_i915_private *i915 = arg->ce[0]->engine->i915; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq = NULL; + int err, n; + + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + i915_request_put(rq); + + mutex_lock(&i915->drm.struct_mutex); + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + mutex_unlock(&i915->drm.struct_mutex); + return PTR_ERR(rq); + } + + i915_request_get(rq); + i915_request_add(rq); + mutex_unlock(&i915->drm.struct_mutex); + } + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int __live_parallel_switchN(void *data) +{ + struct parallel_switch *arg = data; + struct drm_i915_private *i915 = arg->ce[0]->engine->i915; + IGT_TIMEOUT(end_time); + unsigned long count; + int n; + + count = 0; + do { + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *rq; + + mutex_lock(&i915->drm.struct_mutex); + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + mutex_unlock(&i915->drm.struct_mutex); + return PTR_ERR(rq); + } + + i915_request_add(rq); + mutex_unlock(&i915->drm.struct_mutex); + } + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int live_parallel_switch(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_switch1, + __live_parallel_switchN, + NULL, + }; + struct parallel_switch *data = NULL; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + int (* const *fn)(void *arg); + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + int n, m, count; + int err = 0; + + /* + * Check we can process switches on all engines simultaneously. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_locked; + } + + engines = i915_gem_context_lock_engines(ctx); + count = engines->num_engines; + + data = kcalloc(count, sizeof(*data), GFP_KERNEL); + if (!data) { + i915_gem_context_unlock_engines(ctx); + err = -ENOMEM; + goto out_locked; + } + + m = 0; /* Use the first context as our template for the engines */ + for_each_gem_engine(ce, engines, it) { + err = intel_context_pin(ce); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_locked; + } + data[m++].ce[0] = intel_context_get(ce); + } + i915_gem_context_unlock_engines(ctx); + + /* Clone the same set of engines into the other contexts */ + for (n = 1; n < ARRAY_SIZE(data->ce); n++) { + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_locked; + } + + for (m = 0; m < count; m++) { + if (!data[m].ce[0]) + continue; + + ce = intel_context_create(ctx, data[m].ce[0]->engine); + if (IS_ERR(ce)) + goto out_locked; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out_locked; + } + + data[m].ce[n] = ce; + } + } + + mutex_unlock(&i915->drm.struct_mutex); + + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + int n; + + mutex_lock(&i915->drm.struct_mutex); + err = igt_live_test_begin(&t, i915, __func__, ""); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + break; + + for (n = 0; n < count; n++) { + if (!data[n].ce[0]) + continue; + + data[n].tsk = kthread_run(*fn, &data[n], + "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(data[n].tsk)) { + err = PTR_ERR(data[n].tsk); + break; + } + get_task_struct(data[n].tsk); + } + + for (n = 0; n < count; n++) { + int status; + + if (IS_ERR_OR_NULL(data[n].tsk)) + continue; + + status = kthread_stop(data[n].tsk); + if (status && !err) + err = status; + + put_task_struct(data[n].tsk); + data[n].tsk = NULL; + } + + mutex_lock(&i915->drm.struct_mutex); + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + } + + mutex_lock(&i915->drm.struct_mutex); +out_locked: + for (n = 0; n < count; n++) { + for (m = 0; m < ARRAY_SIZE(data->ce); m++) { + if (!data[n].ce[m]) + continue; + + intel_context_unpin(data[n].ce[m]); + intel_context_put(data[n].ce[m]); + } + } + mutex_unlock(&i915->drm.struct_mutex); + kfree(data); + mock_file_free(i915, file); + return err; +} + static unsigned long real_page_count(struct drm_i915_gem_object *obj) { return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; @@ -1681,6 +1905,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), + SUBTEST(live_parallel_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), SUBTEST(igt_ctx_sseu), From 1d6f1d16d3a36351a6718378790497f1ba3b5378 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Sep 2019 17:03:35 +0100 Subject: [PATCH 283/331] drm/i915/gt: Only unwedge if we can reset first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unwedging the GPU requires a successful GPU reset before we restore the default submission, or else we may see residual context switch events that we were not expecting. v2: Pull in the special-case reset_clobbers_display, and explain why it should be safe in the context of unwedging. v3: Just forget all about resets before unwedging if it will clobber the display; risk it all. Reported-by: Janusz Krzysztofik Signed-off-by: Chris Wilson Cc: Janusz Krzysztofik Cc: Daniele Ceraolo Spurio Cc: Ville Syrjälä Reviewed-by: Daniele Ceraolo Spurio #v1 Link: https://patchwork.freedesktop.org/patch/msgid/20190927160335.10622-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_reset.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 76938fa3a1b9f..e189897e87975 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -821,6 +821,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; unsigned long flags; + bool ok; if (!test_bit(I915_WEDGED, >->reset.flags)) return true; @@ -867,7 +868,12 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) } spin_unlock_irqrestore(&timelines->lock, flags); - intel_gt_sanitize(gt, false); + /* We must reset pending GPU events before restoring our submission */ + ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; + if (!ok) + return false; /* * Undo nop_submit_request. We prevent all new i915 requests from From f8db4d051b5eb8d6272b67d76cf79971d117a728 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Oct 2019 11:35:18 +0100 Subject: [PATCH 284/331] drm/i915: Initialise breadcrumb lists on the virtual engine With deferring the breadcrumb signalling to the virtual engine (thanks preempt-to-busy) we need to make sure the lists and irq-worker are ready to send a signal. [41958.710544] BUG: kernel NULL pointer dereference, address: 0000000000000000 [41958.710553] #PF: supervisor write access in kernel mode [41958.710556] #PF: error_code(0x0002) - not-present page [41958.710558] PGD 0 P4D 0 [41958.710562] Oops: 0002 [#1] SMP [41958.710565] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U 5.3.0+ #207 [41958.710568] Hardware name: Intel Corporation NUC7i5BNK/NUC7i5BNB, BIOS BNKBL357.86A.0052.2017.0918.1346 09/18/2017 [41958.710602] RIP: 0010:i915_request_enable_breadcrumb+0xe1/0x130 [i915] [41958.710605] Code: 8b 44 24 30 48 89 41 08 48 89 08 48 8b 85 98 01 00 00 48 8d 8d 90 01 00 00 48 89 95 98 01 00 00 49 89 4c 24 28 49 89 44 24 30 <48> 89 10 f0 80 4b 30 10 c6 85 88 01 00 00 00 e9 1a ff ff ff 48 83 [41958.710609] RSP: 0018:ffffc90000003de0 EFLAGS: 00010046 [41958.710612] RAX: 0000000000000000 RBX: ffff888735424480 RCX: ffff8887cddb2190 [41958.710614] RDX: ffff8887cddb3570 RSI: ffff888850362190 RDI: ffff8887cddb2188 [41958.710617] RBP: ffff8887cddb2000 R08: ffff8888503624a8 R09: 0000000000000100 [41958.710619] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8887cddb3548 [41958.710622] R13: 0000000000000000 R14: 0000000000000046 R15: ffff888850362070 [41958.710625] FS: 0000000000000000(0000) GS:ffff88885ea00000(0000) knlGS:0000000000000000 [41958.710628] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [41958.710630] CR2: 0000000000000000 CR3: 0000000002c09002 CR4: 00000000001606f0 [41958.710633] Call Trace: [41958.710636] [41958.710668] __i915_request_submit+0x12b/0x160 [i915] [41958.710693] virtual_submit_request+0x67/0x120 [i915] [41958.710720] __unwind_incomplete_requests+0x131/0x170 [i915] [41958.710744] execlists_dequeue+0xb40/0xe00 [i915] [41958.710771] execlists_submission_tasklet+0x10f/0x150 [i915] [41958.710776] tasklet_action_common.isra.17+0x41/0xa0 [41958.710781] __do_softirq+0xc8/0x221 [41958.710785] irq_exit+0xa6/0xb0 [41958.710788] smp_apic_timer_interrupt+0x4d/0x80 [41958.710791] apic_timer_interrupt+0xf/0x20 [41958.710794] Fixes: cb2377a919bb ("drm/i915: Fixup preempt-to-busy vs reset of a virtual request") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191001103518.9113-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index fa385218ce925..9877b6ba13df8 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4175,6 +4175,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); + intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); From 99785b86eead0934dffc1b36f3e5820d0c87e69f Mon Sep 17 00:00:00 2001 From: Srinivasan S Date: Wed, 25 Sep 2019 06:05:42 +0530 Subject: [PATCH 285/331] drm/i915/dp: Fix DP MST error after unplugging TypeC cable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch avoids DP MST payload error message in dmesg, as it is trying to update the payload to the disconnected DP MST device. After DP MST device is disconnected we should not be updating the payload and hence remove the error. v2: Removed the connector status check and converted from error to debug. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111632 Signed-off-by: Srinivasan S Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/1569371742-109402-1-git-send-email-srinivasan.s@intel.com --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index cf4d851a51395..df4f35c10a693 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -215,7 +215,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); if (ret) { - DRM_ERROR("failed to update payload %d\n", ret); + DRM_DEBUG_KMS("failed to update payload %d\n", ret); } if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, From a4311745bba9763e3c965643d4531bd5765b0513 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 28 Sep 2019 09:25:46 +0100 Subject: [PATCH 286/331] drm/i915/userptr: Never allow userptr into the mappable GGTT Daniel Vetter uncovered a nasty cycle in using the mmu-notifiers to invalidate userptr objects which also happen to be pulled into GGTT mmaps. That is when we unbind the userptr object (on mmu invalidation), we revoke all CPU mmaps, which may then recurse into mmu invalidation. We looked for ways of breaking the cycle, but the revocation on invalidation is required and cannot be avoided. The only solution we could see was to not allow such GGTT bindings of userptr objects in the first place. In practice, no one really wants to use a GGTT mmapping of a CPU pointer... Just before Daniel's explosive lockdep patches land in v5.4-rc1, we got a genuine blip from CI: <4>[ 246.793958] ====================================================== <4>[ 246.793972] WARNING: possible circular locking dependency detected <4>[ 246.793989] 5.3.0-gbd6c56f50d15-drmtip_372+ #1 Tainted: G U <4>[ 246.794003] ------------------------------------------------------ <4>[ 246.794017] kswapd0/145 is trying to acquire lock: <4>[ 246.794030] 000000003f565be6 (&dev->struct_mutex/1){+.+.}, at: userptr_mn_invalidate_range_start+0x18f/0x220 [i915] <4>[ 246.794250] but task is already holding lock: <4>[ 246.794263] 000000001799cef9 (&anon_vma->rwsem){++++}, at: page_lock_anon_vma_read+0xe6/0x2a0 <4>[ 246.794291] which lock already depends on the new lock. <4>[ 246.794307] the existing dependency chain (in reverse order) is: <4>[ 246.794322] -> #3 (&anon_vma->rwsem){++++}: <4>[ 246.794344] down_write+0x33/0x70 <4>[ 246.794357] __vma_adjust+0x3d9/0x7b0 <4>[ 246.794370] __split_vma+0x16a/0x180 <4>[ 246.794385] mprotect_fixup+0x2a5/0x320 <4>[ 246.794399] do_mprotect_pkey+0x208/0x2e0 <4>[ 246.794413] __x64_sys_mprotect+0x16/0x20 <4>[ 246.794429] do_syscall_64+0x55/0x1c0 <4>[ 246.794443] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 246.794456] -> #2 (&mapping->i_mmap_rwsem){++++}: <4>[ 246.794478] down_write+0x33/0x70 <4>[ 246.794493] unmap_mapping_pages+0x48/0x130 <4>[ 246.794519] i915_vma_revoke_mmap+0x81/0x1b0 [i915] <4>[ 246.794519] i915_vma_unbind+0x11d/0x4a0 [i915] <4>[ 246.794519] i915_vma_destroy+0x31/0x300 [i915] <4>[ 246.794519] __i915_gem_free_objects+0xb8/0x4b0 [i915] <4>[ 246.794519] drm_file_free.part.0+0x1e6/0x290 <4>[ 246.794519] drm_release+0xa6/0xe0 <4>[ 246.794519] __fput+0xc2/0x250 <4>[ 246.794519] task_work_run+0x82/0xb0 <4>[ 246.794519] do_exit+0x35b/0xdb0 <4>[ 246.794519] do_group_exit+0x34/0xb0 <4>[ 246.794519] __x64_sys_exit_group+0xf/0x10 <4>[ 246.794519] do_syscall_64+0x55/0x1c0 <4>[ 246.794519] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 246.794519] -> #1 (&vm->mutex){+.+.}: <4>[ 246.794519] i915_gem_shrinker_taints_mutex+0x6d/0xe0 [i915] <4>[ 246.794519] i915_address_space_init+0x9f/0x160 [i915] <4>[ 246.794519] i915_ggtt_init_hw+0x55/0x170 [i915] <4>[ 246.794519] i915_driver_probe+0xc9f/0x1620 [i915] <4>[ 246.794519] i915_pci_probe+0x43/0x1b0 [i915] <4>[ 246.794519] pci_device_probe+0x9e/0x120 <4>[ 246.794519] really_probe+0xea/0x3d0 <4>[ 246.794519] driver_probe_device+0x10b/0x120 <4>[ 246.794519] device_driver_attach+0x4a/0x50 <4>[ 246.794519] __driver_attach+0x97/0x130 <4>[ 246.794519] bus_for_each_dev+0x74/0xc0 <4>[ 246.794519] bus_add_driver+0x13f/0x210 <4>[ 246.794519] driver_register+0x56/0xe0 <4>[ 246.794519] do_one_initcall+0x58/0x300 <4>[ 246.794519] do_init_module+0x56/0x1f6 <4>[ 246.794519] load_module+0x25bd/0x2a40 <4>[ 246.794519] __se_sys_finit_module+0xd3/0xf0 <4>[ 246.794519] do_syscall_64+0x55/0x1c0 <4>[ 246.794519] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 246.794519] -> #0 (&dev->struct_mutex/1){+.+.}: <4>[ 246.794519] __lock_acquire+0x15d8/0x1e90 <4>[ 246.794519] lock_acquire+0xa6/0x1c0 <4>[ 246.794519] __mutex_lock+0x9d/0x9b0 <4>[ 246.794519] userptr_mn_invalidate_range_start+0x18f/0x220 [i915] <4>[ 246.794519] __mmu_notifier_invalidate_range_start+0x85/0x110 <4>[ 246.794519] try_to_unmap_one+0x76b/0x860 <4>[ 246.794519] rmap_walk_anon+0x104/0x280 <4>[ 246.794519] try_to_unmap+0xc0/0xf0 <4>[ 246.794519] shrink_page_list+0x561/0xc10 <4>[ 246.794519] shrink_inactive_list+0x220/0x440 <4>[ 246.794519] shrink_node_memcg+0x36e/0x740 <4>[ 246.794519] shrink_node+0xcb/0x490 <4>[ 246.794519] balance_pgdat+0x241/0x580 <4>[ 246.794519] kswapd+0x16c/0x530 <4>[ 246.794519] kthread+0x119/0x130 <4>[ 246.794519] ret_from_fork+0x24/0x50 <4>[ 246.794519] other info that might help us debug this: <4>[ 246.794519] Chain exists of: &dev->struct_mutex/1 --> &mapping->i_mmap_rwsem --> &anon_vma->rwsem <4>[ 246.794519] Possible unsafe locking scenario: <4>[ 246.794519] CPU0 CPU1 <4>[ 246.794519] ---- ---- <4>[ 246.794519] lock(&anon_vma->rwsem); <4>[ 246.794519] lock(&mapping->i_mmap_rwsem); <4>[ 246.794519] lock(&anon_vma->rwsem); <4>[ 246.794519] lock(&dev->struct_mutex/1); <4>[ 246.794519] *** DEADLOCK *** v2: Say no to mmap_ioctl Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111744 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111870 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190928082546.3473-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 7 +++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 ++++++ drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 3 ++- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 860b751c51f19..dd0c2840ba4d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -343,6 +343,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) /* fallthrough */ case -EIO: /* shmemfs failure from swap device */ case -EFAULT: /* purged object */ + case -ENODEV: /* bad object, how did you get here! */ return VM_FAULT_SIGBUS; case -ENOSPC: /* shmemfs allocation failure */ @@ -466,10 +467,16 @@ i915_gem_mmap_gtt(struct drm_file *file, if (!obj) return -ENOENT; + if (i915_gem_object_never_bind_ggtt(obj)) { + ret = -ENODEV; + goto out; + } + ret = create_mmap_offset(obj); if (ret == 0) *offset = drm_vma_node_offset_addr(&obj->base.vma_node); +out: i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 29b9eddc4c7f5..aec05f967d9d7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -152,6 +152,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; } +static inline bool +i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT; +} + static inline bool i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d695f187b7905..e1aab2fd1cd9a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -32,7 +32,8 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) #define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(3) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 74da35611d7c0..a0d974ac78a73 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -710,6 +710,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_NO_GGTT | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3d3fda4cae997..1426e506700d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -970,6 +970,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (i915_gem_object_never_bind_ggtt(obj)) + return ERR_PTR(-ENODEV); + if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { /* If the required space is larger than the available From 4fb8783165b7c6fb4b52428dbb80da8554e5fd1e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 1 Oct 2019 18:25:06 +0300 Subject: [PATCH 287/331] drm/i915/display: abstract all vgaarb access to intel_vga.[ch] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split out the code related to vga client and vgaarb all over the place into new intel_vga.[ch]. No functional changes. Cc: Ville Syrjälä Cc: Chris Wilson Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191001152506.7854-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/display/intel_display.c | 97 +---------- drivers/gpu/drm/i915/display/intel_display.h | 3 - .../drm/i915/display/intel_display_power.c | 24 +-- drivers/gpu/drm/i915/display/intel_vga.c | 160 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_vga.h | 18 ++ drivers/gpu/drm/i915/i915_drv.c | 30 +--- drivers/gpu/drm/i915/i915_pci.c | 1 - drivers/gpu/drm/i915/i915_suspend.c | 3 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 1 - 10 files changed, 194 insertions(+), 146 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_vga.c create mode 100644 drivers/gpu/drm/i915/display/intel_vga.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 8d407ff7d59dc..b020b024286d0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -184,7 +184,8 @@ i915-y += \ display/intel_psr.o \ display/intel_quirks.o \ display/intel_sprite.o \ - display/intel_tc.o + display/intel_tc.o \ + display/intel_vga.o i915-$(CONFIG_ACPI) += \ display/intel_acpi.o \ display/intel_opregion.o diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f1328c08f4adb..d99c59e97568e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -79,6 +78,7 @@ #include "intel_sideband.h" #include "intel_sprite.h" #include "intel_tc.h" +#include "intel_vga.h" /* Primary plane formats for gen <= 3 */ static const u32 i8xx_primary_formats[] = { @@ -4241,7 +4241,7 @@ __intel_display_resume(struct drm_device *dev, int i, ret; intel_modeset_setup_hw_state(dev, ctx); - i915_redisable_vga(to_i915(dev)); + intel_vga_redisable(to_i915(dev)); if (!state) return 0; @@ -15994,35 +15994,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } -static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return VLV_VGACNTRL; - else if (INTEL_GEN(dev_priv) >= 5) - return CPU_VGACNTRL; - else - return VGACNTRL; -} - -/* Disable the VGA plane that we never use */ -static void i915_disable_vga(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u8 sr1; - i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); - - /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(SR01, VGA_SR_INDEX); - sr1 = inb(VGA_SR_DATA); - outb(sr1 | 1<<5, VGA_SR_DATA); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - udelay(300); - - I915_WRITE(vga_reg, VGA_DISP_DISABLE); - POSTING_READ(vga_reg); -} - void intel_modeset_init_hw(struct drm_i915_private *i915) { intel_update_cdclk(i915); @@ -16288,7 +16259,7 @@ int intel_modeset_init(struct drm_i915_private *i915) intel_update_max_cdclk(i915); /* Just disable it once at startup */ - i915_disable_vga(i915); + intel_vga_disable(i915); intel_setup_outputs(i915); drm_modeset_lock_all(dev); @@ -16647,39 +16618,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) icl_sanitize_encoder_pll_mapping(encoder); } -void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) -{ - i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); - - if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { - DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); - i915_disable_vga(dev_priv); - } -} - -void i915_redisable_vga(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - /* - * This function can be called both from intel_modeset_setup_hw_state or - * at a very early point in our resume sequence, where the power well - * structures are not yet restored. Since this function is at a very - * paranoid "someone might have enabled VGA while we were not looking" - * level, just check if the power well is enabled instead of trying to - * follow the "don't touch the power well if we don't need it" policy - * the rest of the driver uses. - */ - wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_VGA); - if (!wakeref) - return; - - i915_redisable_vga_power_on(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref); -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct drm_i915_private *dev_priv) { @@ -17188,35 +17126,6 @@ void intel_modeset_driver_remove(struct drm_i915_private *i915) intel_fbc_cleanup_cfb(i915); } -/* - * set vga decode state - true == enable VGA decode - */ -int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state) -{ - unsigned reg = INTEL_GEN(dev_priv) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; - u16 gmch_ctrl; - - if (pci_read_config_word(dev_priv->bridge_dev, reg, &gmch_ctrl)) { - DRM_ERROR("failed to read control word\n"); - return -EIO; - } - - if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !state) - return 0; - - if (state) - gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; - else - gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; - - if (pci_write_config_word(dev_priv->bridge_dev, reg, gmch_ctrl)) { - DRM_ERROR("failed to write control word\n"); - return -EIO; - } - - return 0; -} - #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) struct intel_display_error_state { diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 4b9e18e5a263d..2782f23ee8879 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -579,10 +579,7 @@ void intel_display_print_error_state(struct drm_i915_error_state_buf *e, void intel_modeset_init_hw(struct drm_i915_private *i915); int intel_modeset_init(struct drm_i915_private *i915); void intel_modeset_driver_remove(struct drm_i915_private *i915); -int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state); void intel_display_resume(struct drm_device *dev); -void i915_redisable_vga(struct drm_i915_private *dev_priv); -void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); void intel_init_pch_refclk(struct drm_i915_private *dev_priv); /* modesetting asserts */ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index f1186bc23542f..bb642a1a0dd47 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3,8 +3,6 @@ * Copyright © 2019 Intel Corporation */ -#include - #include "display/intel_crt.h" #include "display/intel_dp.h" @@ -19,6 +17,7 @@ #include "intel_hotplug.h" #include "intel_sideband.h" #include "intel_tc.h" +#include "intel_vga.h" bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, enum i915_power_well_id power_well_id); @@ -267,23 +266,8 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv, u8 irq_pipe_mask, bool has_vga) { - struct pci_dev *pdev = dev_priv->drm.pdev; - - /* - * After we re-enable the power well, if we touch VGA register 0x3d5 - * we'll get unclaimed register interrupts. This stops after we write - * anything to the VGA MSR register. The vgacon module uses this - * register all the time, so if we unbind our driver and, as a - * consequence, bind vgacon, we'll get stuck in an infinite loop at - * console_unlock(). So make here we touch the VGA MSR register, making - * sure vgacon can keep working normally without triggering interrupts - * and error messages. - */ - if (has_vga) { - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - } + if (has_vga) + intel_vga_msr_write(dev_priv); if (irq_pipe_mask) gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask); @@ -1205,7 +1189,7 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) intel_crt_reset(&encoder->base); } - i915_redisable_vga_power_on(dev_priv); + intel_vga_redisable_power_on(dev_priv); intel_pps_unlock_regs_wa(dev_priv); } diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c new file mode 100644 index 0000000000000..732568eaa988c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include +#include + +#include + +#include "i915_drv.h" +#include "intel_vga.h" + +static i915_reg_t intel_vga_cntrl_reg(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + return VLV_VGACNTRL; + else if (INTEL_GEN(i915) >= 5) + return CPU_VGACNTRL; + else + return VGACNTRL; +} + +/* Disable the VGA plane that we never use */ +void intel_vga_disable(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); + u8 sr1; + + /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(SR01, VGA_SR_INDEX); + sr1 = inb(VGA_SR_DATA); + outb(sr1 | 1 << 5, VGA_SR_DATA); + vga_put(pdev, VGA_RSRC_LEGACY_IO); + udelay(300); + + I915_WRITE(vga_reg, VGA_DISP_DISABLE); + POSTING_READ(vga_reg); +} + +void intel_vga_redisable_power_on(struct drm_i915_private *dev_priv) +{ + i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); + + if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { + DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); + intel_vga_disable(dev_priv); + } +} + +void intel_vga_redisable(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + /* + * This function can be called both from intel_modeset_setup_hw_state or + * at a very early point in our resume sequence, where the power well + * structures are not yet restored. Since this function is at a very + * paranoid "someone might have enabled VGA while we were not looking" + * level, just check if the power well is enabled instead of trying to + * follow the "don't touch the power well if we don't need it" policy + * the rest of the driver uses. + */ + wakeref = intel_display_power_get_if_enabled(i915, POWER_DOMAIN_VGA); + if (!wakeref) + return; + + intel_vga_redisable_power_on(i915); + + intel_display_power_put(i915, POWER_DOMAIN_VGA, wakeref); +} + +void intel_vga_msr_write(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* + * After we re-enable the power well, if we touch VGA register 0x3d5 + * we'll get unclaimed register interrupts. This stops after we write + * anything to the VGA MSR register. The vgacon module uses this + * register all the time, so if we unbind our driver and, as a + * consequence, bind vgacon, we'll get stuck in an infinite loop at + * console_unlock(). So make here we touch the VGA MSR register, making + * sure vgacon can keep working normally without triggering interrupts + * and error messages. + */ + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); + vga_put(pdev, VGA_RSRC_LEGACY_IO); +} + +static int +intel_vga_set_state(struct drm_i915_private *i915, bool enable_decode) +{ + unsigned int reg = INTEL_GEN(i915) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; + u16 gmch_ctrl; + + if (pci_read_config_word(i915->bridge_dev, reg, &gmch_ctrl)) { + DRM_ERROR("failed to read control word\n"); + return -EIO; + } + + if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !enable_decode) + return 0; + + if (enable_decode) + gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; + else + gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; + + if (pci_write_config_word(i915->bridge_dev, reg, gmch_ctrl)) { + DRM_ERROR("failed to write control word\n"); + return -EIO; + } + + return 0; +} + +static unsigned int +intel_vga_set_decode(void *cookie, bool enable_decode) +{ + struct drm_i915_private *i915 = cookie; + + intel_vga_set_state(i915, enable_decode); + + if (enable_decode) + return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | + VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; + else + return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; +} + +int intel_vga_register(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + int ret; + + /* + * If we have > 1 VGA cards, then we need to arbitrate access to the + * common VGA resources. + * + * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA), + * then we do not take part in VGA arbitration and the + * vga_client_register() fails with -ENODEV. + */ + ret = vga_client_register(pdev, i915, NULL, intel_vga_set_decode); + if (ret && ret != -ENODEV) + return ret; + + return 0; +} + +void intel_vga_unregister(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + vga_client_register(pdev, NULL, NULL, NULL); +} diff --git a/drivers/gpu/drm/i915/display/intel_vga.h b/drivers/gpu/drm/i915/display/intel_vga.h new file mode 100644 index 0000000000000..3517872e62ac4 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_vga.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_VGA_H__ +#define __INTEL_VGA_H__ + +struct drm_i915_private; + +void intel_vga_msr_write(struct drm_i915_private *i915); +void intel_vga_disable(struct drm_i915_private *i915); +void intel_vga_redisable(struct drm_i915_private *i915); +void intel_vga_redisable_power_on(struct drm_i915_private *i915); +int intel_vga_register(struct drm_i915_private *i915); +void intel_vga_unregister(struct drm_i915_private *i915); + +#endif /* __INTEL_VGA_H__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 364f7d0b7e319..1d2246c59cdea 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -59,6 +58,7 @@ #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" #include "display/intel_sprite.h" +#include "display/intel_vga.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" @@ -269,19 +269,6 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv) release_resource(&dev_priv->mch_res); } -/* true = enable decode, false = disable decoder */ -static unsigned int i915_vga_set_decode(void *cookie, bool state) -{ - struct drm_i915_private *dev_priv = cookie; - - intel_modeset_vga_set_state(dev_priv, state); - if (state) - return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; - else - return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; -} - static int i915_resume_switcheroo(struct drm_i915_private *i915); static int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state); @@ -346,15 +333,8 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) intel_bios_init(i915); - /* If we have > 1 VGA cards, then we need to arbitrate access - * to the common VGA resources. - * - * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA), - * then we do not take part in VGA arbitration and the - * vga_client_register() fails with -ENODEV. - */ - ret = vga_client_register(pdev, i915, NULL, i915_vga_set_decode); - if (ret && ret != -ENODEV) + ret = intel_vga_register(i915); + if (ret) goto out; intel_register_dsm_handler(); @@ -416,7 +396,7 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) intel_power_domains_driver_remove(i915); vga_switcheroo_unregister_client(pdev); cleanup_vga_client: - vga_client_register(pdev, NULL, NULL, NULL); + intel_vga_unregister(i915); out: return ret; } @@ -455,7 +435,7 @@ static void i915_driver_modeset_remove(struct drm_i915_private *i915) intel_bios_driver_remove(i915); vga_switcheroo_unregister_client(pdev); - vga_client_register(pdev, NULL, NULL, NULL); + intel_vga_unregister(i915); intel_csr_ucode_fini(i915); } diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ea53dfe2fba04..1cbf3998b3616 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -23,7 +23,6 @@ */ #include -#include #include #include diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 8508a01ad8b91..2b2086def0f11 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -28,6 +28,7 @@ #include "display/intel_fbc.h" #include "display/intel_gmbus.h" +#include "display/intel_vga.h" #include "i915_drv.h" #include "i915_reg.h" @@ -57,7 +58,7 @@ static void i915_restore_display(struct drm_i915_private *dev_priv) if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL); - i915_redisable_vga(dev_priv); + intel_vga_redisable(dev_priv); } int i915_save_state(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 2fd3c097e1f59..ad719c9602af9 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -27,7 +27,6 @@ */ #include -#include #include From 74f1d78965ee16a2617d245fd1f7e40964c0d562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Oct 2019 18:46:28 +0300 Subject: [PATCH 288/331] drm/i915: Limit MST modes based on plane size too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding the max plane size checks to the .mode_valid() hooks I naturally forgot about MST. Take care of that one as well. Cc: Manasi Navare Cc: Sean Paul Cc: José Roberto de Souza Cc: Maarten Lankhorst Fixes: 2d20411e25a3 ("drm/i915: Don't advertise modes that exceed the max plane size") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191001154629.11063-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index df4f35c10a693..2203be28ea016 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -424,6 +424,7 @@ static enum drm_mode_status intel_dp_mst_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -451,7 +452,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, if (mode_rate > max_rate || mode->clock > max_dotclk) return MODE_CLOCK_HIGH; - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *connector, From 15de0889b0c905a64d6f0fbc765a25d7899a560b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Oct 2019 18:46:29 +0300 Subject: [PATCH 289/331] drm/i915: Polish intel_tv_mode_valid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the tv_mode NULL check since intel_tv_mode_find() never actually returns NULL, and flip the condition around so that the MODE_OK case is at the end, which is customary to all the other .mode_valid() implementations. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191001154629.11063-2-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_tv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index b70221f5112a7..71c3f7e5df7d2 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -961,11 +961,10 @@ intel_tv_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; /* Ensure TV refresh is close to desired refresh */ - if (tv_mode && abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) - < 1000) - return MODE_OK; + if (abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) >= 1000) + return MODE_CLOCK_RANGE; - return MODE_CLOCK_RANGE; + return MODE_OK; } static int From 006e570128f413759b9df64b51bae79903679c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 30 Sep 2019 21:30:45 +0300 Subject: [PATCH 290/331] drm/i915: Fix g4x sprite scaling stride check with GTT remapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I forgot to update the g4x sprite scaling stride check when GTT remapping was introduced. The stride of the original framebuffer is irrelevant when remapping is used and instead we want to check the stride of the remapped view. Also drop the duplicate width_bytes check. We already check that a few lines earlier. Fixes: df79cf441910 ("drm/i915: Store the final plane stride in plane_state") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190930183045.662-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_sprite.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 3d56db32291b6..633fa80693481 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1516,6 +1516,7 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, int src_x, src_w, src_h, crtc_w, crtc_h; const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + unsigned int stride = plane_state->color_plane[0].stride; unsigned int cpp = fb->format->cpp[0]; unsigned int width_bytes; int min_width, min_height; @@ -1557,9 +1558,9 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, return -EINVAL; } - if (width_bytes > 4096 || fb->pitches[0] > 4096) { + if (stride > 4096) { DRM_DEBUG_KMS("Stride (%u) exceeds hardware max with scaling (%u)\n", - fb->pitches[0], 4096); + stride, 4096); return -EINVAL; } From dfe324f34c53af095bfe54b322e1a338421a6b0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 Oct 2019 13:24:30 +0100 Subject: [PATCH 291/331] drm/i915/selftests: Extract random_offset() for use with a prng For selftests, we desire repeatability and so prefer using a prng with known seed over true randomness. Extract random_offset() as a selftest utility that can take the prng state. Suggested-by: Matthew Auld Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191002122430.23205-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 14 ++++++++----- drivers/gpu/drm/i915/selftests/i915_random.c | 20 +++++++++++++++++++ drivers/gpu/drm/i915/selftests/i915_random.h | 4 ++++ 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 0d40e0b429230..f901bbb9586e0 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1299,6 +1299,7 @@ static int igt_gtt_reserve(void *arg) { struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; + I915_RND_STATE(prng); LIST_HEAD(objects); u64 total; int err = -ENODEV; @@ -1425,9 +1426,10 @@ static int igt_gtt_reserve(void *arg) goto out; } - offset = random_offset(0, ggtt->vm.total, - 2*I915_GTT_PAGE_SIZE, - I915_GTT_MIN_ALIGNMENT); + offset = igt_random_offset(&prng, + 0, ggtt->vm.total, + 2 * I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, @@ -1772,6 +1774,7 @@ static int igt_cs_tlb(void *arg) struct intel_context *ce; struct drm_file *file; struct i915_vma *vma; + I915_RND_STATE(prng); unsigned int i; u32 *result; u32 *batch; @@ -1885,8 +1888,9 @@ static int igt_cs_tlb(void *arg) struct i915_request *rq; u64 offset; - offset = random_offset(0, vm->total - PAGE_SIZE, - chunk_size, PAGE_SIZE); + offset = igt_random_offset(&prng, + 0, vm->total - PAGE_SIZE, + chunk_size, PAGE_SIZE); err = vm->allocate_va_range(vm, offset, chunk_size); if (err) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 716a3f19f0303..abdfadcf626bd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -29,6 +29,7 @@ #include #include "i915_random.h" +#include "i915_utils.h" u64 i915_prandom_u64_state(struct rnd_state *rnd) { @@ -87,3 +88,22 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) i915_random_reorder(order, count, state); return order; } + +u64 igt_random_offset(struct rnd_state *state, + u64 start, u64 end, + u64 len, u64 align) +{ + u64 range, addr; + + BUG_ON(range_overflows(start, len, end)); + BUG_ON(round_up(start, align) > round_down(end - len, align)); + + range = round_down(end - len, align) - round_up(start, align); + if (range) { + addr = i915_prandom_u64_state(state); + div64_u64_rem(addr, range, &addr); + start += addr; + } + + return round_up(start, align); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 8e1ff9c105b66..35cc69a3a1b99 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -57,4 +57,8 @@ void i915_random_reorder(unsigned int *order, void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, struct rnd_state *state); +u64 igt_random_offset(struct rnd_state *state, + u64 start, u64 end, + u64 len, u64 align); + #endif /* !__I915_SELFTESTS_RANDOM_H__ */ From 3cbad5d7774953d36c09cca295e6b84e61b03ae7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 Oct 2019 13:30:14 +0100 Subject: [PATCH 292/331] drm/i915/gem: Refactor tests on obj->ops->flags We repeat obj->ops->flags in our object checks, so pull that into its own little helper for clarity. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191002123014.1545-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index aec05f967d9d7..53c7069ba3e89 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -134,34 +134,41 @@ i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) return obj->base.vma_node.readonly; } +static inline bool +i915_gem_object_type_has(const struct drm_i915_gem_object *obj, + unsigned long flags) +{ + return obj->ops->flags & flags; +} + static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); } static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); } static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); } static inline bool i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT); } static inline bool i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); } static inline bool From ed500bf61279c26bcaa5a9979543bc45883efddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 8 Jul 2019 19:20:42 +0300 Subject: [PATCH 293/331] drm/i915: Populate possible_crtcs correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't advertize non-exisiting crtcs in the encoder possible_crtcs bitmask. Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190708162048.4286-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d99c59e97568e..20ec8a1dc5e16 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15257,6 +15257,20 @@ static int intel_encoder_clones(struct intel_encoder *encoder) return index_mask; } +static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder) +{ + struct drm_device *dev = encoder->base.dev; + struct intel_crtc *crtc; + u32 possible_crtcs = 0; + + for_each_intel_crtc(dev, crtc) { + if (encoder->crtc_mask & BIT(crtc->pipe)) + possible_crtcs |= drm_crtc_mask(&crtc->base); + } + + return possible_crtcs; +} + static bool ilk_has_edp_a(struct drm_i915_private *dev_priv) { if (!IS_MOBILE(dev_priv)) @@ -15557,7 +15571,8 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_psr_init(dev_priv); for_each_intel_encoder(&dev_priv->drm, encoder) { - encoder->base.possible_crtcs = encoder->crtc_mask; + encoder->base.possible_crtcs = + intel_encoder_possible_crtcs(encoder); encoder->base.possible_clones = intel_encoder_clones(encoder); } From 0fbae9d2db851e18ec413da2eba9d875269660e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 8 Jul 2019 19:20:44 +0300 Subject: [PATCH 294/331] drm/i915: Clean up encoder->crtc_mask setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use BIT(pipe) for better legibility when populating the crtc_mask for encoders. Also remove the redundant possible_crtcs setup for the TV encoder. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190708162048.4286-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_crt.c | 4 ++-- drivers/gpu/drm/i915/display/intel_dp.c | 6 +++--- drivers/gpu/drm/i915/display/intel_dvo.c | 2 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 6 +++--- drivers/gpu/drm/i915/display/intel_lvds.c | 6 +++--- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- drivers/gpu/drm/i915/display/intel_tv.c | 3 +-- 7 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index e6e8d4a820446..ff6126ea793ce 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -994,9 +994,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = (1 << INTEL_OUTPUT_DVO) | (1 << INTEL_OUTPUT_HDMI); if (IS_I830(dev_priv)) - crt->base.crtc_mask = (1 << 0); + crt->base.crtc_mask = BIT(PIPE_A); else - crt->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + crt->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); if (IS_GEN(dev_priv, 2)) connector->interlace_allowed = 0; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 14a5c661f7103..b788e332e76a7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -7366,11 +7366,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = 1 << 2; + intel_encoder->crtc_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 0; intel_encoder->port = port; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 34193d04597a8..9827f99491d18 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_DVO; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); switch (dvo->type) { case INTEL_DVO_CHIP_TMDS: diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 5ab73331d35df..03b140980fc45 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -3269,11 +3269,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = 1 << 2; + intel_encoder->crtc_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; /* diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index c786abdc3336c..13841d7c455b3 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -900,11 +900,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); else if (IS_GEN(dev_priv, 4)) - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); else - intel_encoder->crtc_mask = (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_B); drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index adeb1c8409768..47f5d87a938a9 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2921,7 +2921,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) bytes[0], bytes[1]); return false; } - intel_sdvo->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_sdvo->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 71c3f7e5df7d2..70726b4812444 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1947,9 +1947,8 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_TVOUT; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); intel_encoder->cloneable = 0; - intel_encoder->base.possible_crtcs = ((1 << 0) | (1 << 1)); intel_tv->type = DRM_MODE_CONNECTOR_Unknown; /* BIOS margin values */ From f21e8b80b79d77872e88d853032294ba5bde0bc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 1 Oct 2019 12:37:29 -0700 Subject: [PATCH 295/331] drm/i915/mg: Use tc_port instead of port parameter to MG registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the MG registers is based on the tc_port not port, so MG_PHY_PORT_LN() was subtracting port and PORT_C what is very fragile. So replacing port to tc_port in all MG register macros and users like we have for DKL. Cc: Lucas De Marchi Cc: Imre Deak Reviewed-by: Lucas De Marchi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191001193729.123736-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 64 +++++++-------- drivers/gpu/drm/i915/i915_reg.h | 100 +++++++++++------------ 2 files changed, 81 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b463e51f8b459..3c1e885e01877 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2681,7 +2681,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); const struct icl_mg_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val; int ln; @@ -2697,33 +2697,33 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, /* Set MG_TX_LINK_PARAMS cri_use_fs32 to 0. */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_LINK_PARAMS(ln, port)); + val = I915_READ(MG_TX1_LINK_PARAMS(ln, tc_port)); val &= ~CRI_USE_FS32; - I915_WRITE(MG_TX1_LINK_PARAMS(ln, port), val); + I915_WRITE(MG_TX1_LINK_PARAMS(ln, tc_port), val); - val = I915_READ(MG_TX2_LINK_PARAMS(ln, port)); + val = I915_READ(MG_TX2_LINK_PARAMS(ln, tc_port)); val &= ~CRI_USE_FS32; - I915_WRITE(MG_TX2_LINK_PARAMS(ln, port), val); + I915_WRITE(MG_TX2_LINK_PARAMS(ln, tc_port), val); } /* Program MG_TX_SWINGCTRL with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_SWINGCTRL(ln, port)); + val = I915_READ(MG_TX1_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( ddi_translations[level].cri_txdeemph_override_17_12); - I915_WRITE(MG_TX1_SWINGCTRL(ln, port), val); + I915_WRITE(MG_TX1_SWINGCTRL(ln, tc_port), val); - val = I915_READ(MG_TX2_SWINGCTRL(ln, port)); + val = I915_READ(MG_TX2_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( ddi_translations[level].cri_txdeemph_override_17_12); - I915_WRITE(MG_TX2_SWINGCTRL(ln, port), val); + I915_WRITE(MG_TX2_SWINGCTRL(ln, tc_port), val); } /* Program MG_TX_DRVCTRL with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_DRVCTRL(ln, port)); + val = I915_READ(MG_TX1_DRVCTRL(ln, tc_port)); val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( @@ -2731,9 +2731,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, CRI_TXDEEMPH_OVERRIDE_11_6( ddi_translations[level].cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; - I915_WRITE(MG_TX1_DRVCTRL(ln, port), val); + I915_WRITE(MG_TX1_DRVCTRL(ln, tc_port), val); - val = I915_READ(MG_TX2_DRVCTRL(ln, port)); + val = I915_READ(MG_TX2_DRVCTRL(ln, tc_port)); val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( @@ -2741,7 +2741,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, CRI_TXDEEMPH_OVERRIDE_11_6( ddi_translations[level].cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; - I915_WRITE(MG_TX2_DRVCTRL(ln, port), val); + I915_WRITE(MG_TX2_DRVCTRL(ln, tc_port), val); /* FIXME: Program CRI_LOADGEN_SEL after the spec is updated */ } @@ -2752,17 +2752,17 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, * values from table for which TX1 and TX2 enabled. */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_CLKHUB(ln, port)); + val = I915_READ(MG_CLKHUB(ln, tc_port)); if (link_clock < 300000) val |= CFG_LOW_RATE_LKREN_EN; else val &= ~CFG_LOW_RATE_LKREN_EN; - I915_WRITE(MG_CLKHUB(ln, port), val); + I915_WRITE(MG_CLKHUB(ln, tc_port), val); } /* Program the MG_TX_DCC based on the link frequency */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_DCC(ln, port)); + val = I915_READ(MG_TX1_DCC(ln, tc_port)); val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK; if (link_clock <= 500000) { val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN; @@ -2770,9 +2770,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val |= CFG_AMI_CK_DIV_OVERRIDE_EN | CFG_AMI_CK_DIV_OVERRIDE_VAL(1); } - I915_WRITE(MG_TX1_DCC(ln, port), val); + I915_WRITE(MG_TX1_DCC(ln, tc_port), val); - val = I915_READ(MG_TX2_DCC(ln, port)); + val = I915_READ(MG_TX2_DCC(ln, tc_port)); val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK; if (link_clock <= 500000) { val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN; @@ -2780,18 +2780,18 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val |= CFG_AMI_CK_DIV_OVERRIDE_EN | CFG_AMI_CK_DIV_OVERRIDE_VAL(1); } - I915_WRITE(MG_TX2_DCC(ln, port), val); + I915_WRITE(MG_TX2_DCC(ln, tc_port), val); } /* Program MG_TX_PISO_READLOAD with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_PISO_READLOAD(ln, port)); + val = I915_READ(MG_TX1_PISO_READLOAD(ln, tc_port)); val |= CRI_CALCINIT; - I915_WRITE(MG_TX1_PISO_READLOAD(ln, port), val); + I915_WRITE(MG_TX1_PISO_READLOAD(ln, tc_port), val); - val = I915_READ(MG_TX2_PISO_READLOAD(ln, port)); + val = I915_READ(MG_TX2_PISO_READLOAD(ln, tc_port)); val |= CRI_CALCINIT; - I915_WRITE(MG_TX2_PISO_READLOAD(ln, port), val); + I915_WRITE(MG_TX2_PISO_READLOAD(ln, tc_port), val); } } @@ -3150,8 +3150,7 @@ static void icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) { struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); + enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); u32 val, bits; int ln; @@ -3167,7 +3166,7 @@ icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); val = I915_READ(DKL_DP_MODE(tc_port)); } else { - val = I915_READ(MG_DP_MODE(ln, port)); + val = I915_READ(MG_DP_MODE(ln, tc_port)); } if (enable) @@ -3178,7 +3177,7 @@ icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) if (INTEL_GEN(dev_priv) >= 12) I915_WRITE(DKL_DP_MODE(tc_port), val); else - I915_WRITE(MG_DP_MODE(ln, port), val); + I915_WRITE(MG_DP_MODE(ln, tc_port), val); } if (INTEL_GEN(dev_priv) == 11) { @@ -3203,8 +3202,7 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); + enum tc_port tc_port = intel_port_to_tc(dev_priv, intel_dig_port->base.port); u32 ln0, ln1, pin_assignment; u8 width; @@ -3217,8 +3215,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); ln1 = I915_READ(DKL_DP_MODE(tc_port)); } else { - ln0 = I915_READ(MG_DP_MODE(0, port)); - ln1 = I915_READ(MG_DP_MODE(1, port)); + ln0 = I915_READ(MG_DP_MODE(0, tc_port)); + ln1 = I915_READ(MG_DP_MODE(1, tc_port)); } ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); @@ -3280,8 +3278,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); I915_WRITE(DKL_DP_MODE(tc_port), ln1); } else { - I915_WRITE(MG_DP_MODE(0, port), ln0); - I915_WRITE(MG_DP_MODE(1, port), ln1); + I915_WRITE(MG_DP_MODE(0, tc_port), ln0); + I915_WRITE(MG_DP_MODE(1, tc_port), ln1); } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 058aa5ca8b73f..eefd789b9a282 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1956,8 +1956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ICL_DPHY_CHKN(port) _MMIO(_ICL_COMBOPHY(port) + _ICL_DPHY_CHKN_REG) #define ICL_DPHY_CHKN_AFE_OVER_PPI_STRAP REG_BIT(7) -#define MG_PHY_PORT_LN(ln, port, ln0p1, ln0p2, ln1p1) \ - _MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) +#define MG_PHY_PORT_LN(ln, tc_port, ln0p1, ln0p2, ln1p1) \ + _MMIO(_PORT(tc_port, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) #define MG_TX_LINK_PARAMS_TX1LN0_PORT1 0x16812C #define MG_TX_LINK_PARAMS_TX1LN1_PORT1 0x16852C @@ -1967,10 +1967,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_LINK_PARAMS_TX1LN1_PORT3 0x16A52C #define MG_TX_LINK_PARAMS_TX1LN0_PORT4 0x16B12C #define MG_TX_LINK_PARAMS_TX1LN1_PORT4 0x16B52C -#define MG_TX1_LINK_PARAMS(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ - MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ - MG_TX_LINK_PARAMS_TX1LN1_PORT1) +#define MG_TX1_LINK_PARAMS(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ + MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ + MG_TX_LINK_PARAMS_TX1LN1_PORT1) #define MG_TX_LINK_PARAMS_TX2LN0_PORT1 0x1680AC #define MG_TX_LINK_PARAMS_TX2LN1_PORT1 0x1684AC @@ -1980,10 +1980,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_LINK_PARAMS_TX2LN1_PORT3 0x16A4AC #define MG_TX_LINK_PARAMS_TX2LN0_PORT4 0x16B0AC #define MG_TX_LINK_PARAMS_TX2LN1_PORT4 0x16B4AC -#define MG_TX2_LINK_PARAMS(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ - MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ - MG_TX_LINK_PARAMS_TX2LN1_PORT1) +#define MG_TX2_LINK_PARAMS(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ + MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ + MG_TX_LINK_PARAMS_TX2LN1_PORT1) #define CRI_USE_FS32 (1 << 5) #define MG_TX_PISO_READLOAD_TX1LN0_PORT1 0x16814C @@ -1994,10 +1994,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_PISO_READLOAD_TX1LN1_PORT3 0x16A54C #define MG_TX_PISO_READLOAD_TX1LN0_PORT4 0x16B14C #define MG_TX_PISO_READLOAD_TX1LN1_PORT4 0x16B54C -#define MG_TX1_PISO_READLOAD(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ - MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ - MG_TX_PISO_READLOAD_TX1LN1_PORT1) +#define MG_TX1_PISO_READLOAD(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ + MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ + MG_TX_PISO_READLOAD_TX1LN1_PORT1) #define MG_TX_PISO_READLOAD_TX2LN0_PORT1 0x1680CC #define MG_TX_PISO_READLOAD_TX2LN1_PORT1 0x1684CC @@ -2007,10 +2007,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_PISO_READLOAD_TX2LN1_PORT3 0x16A4CC #define MG_TX_PISO_READLOAD_TX2LN0_PORT4 0x16B0CC #define MG_TX_PISO_READLOAD_TX2LN1_PORT4 0x16B4CC -#define MG_TX2_PISO_READLOAD(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ - MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ - MG_TX_PISO_READLOAD_TX2LN1_PORT1) +#define MG_TX2_PISO_READLOAD(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ + MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ + MG_TX_PISO_READLOAD_TX2LN1_PORT1) #define CRI_CALCINIT (1 << 1) #define MG_TX_SWINGCTRL_TX1LN0_PORT1 0x168148 @@ -2021,10 +2021,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_SWINGCTRL_TX1LN1_PORT3 0x16A548 #define MG_TX_SWINGCTRL_TX1LN0_PORT4 0x16B148 #define MG_TX_SWINGCTRL_TX1LN1_PORT4 0x16B548 -#define MG_TX1_SWINGCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \ - MG_TX_SWINGCTRL_TX1LN0_PORT2, \ - MG_TX_SWINGCTRL_TX1LN1_PORT1) +#define MG_TX1_SWINGCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \ + MG_TX_SWINGCTRL_TX1LN0_PORT2, \ + MG_TX_SWINGCTRL_TX1LN1_PORT1) #define MG_TX_SWINGCTRL_TX2LN0_PORT1 0x1680C8 #define MG_TX_SWINGCTRL_TX2LN1_PORT1 0x1684C8 @@ -2034,10 +2034,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_SWINGCTRL_TX2LN1_PORT3 0x16A4C8 #define MG_TX_SWINGCTRL_TX2LN0_PORT4 0x16B0C8 #define MG_TX_SWINGCTRL_TX2LN1_PORT4 0x16B4C8 -#define MG_TX2_SWINGCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \ - MG_TX_SWINGCTRL_TX2LN0_PORT2, \ - MG_TX_SWINGCTRL_TX2LN1_PORT1) +#define MG_TX2_SWINGCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \ + MG_TX_SWINGCTRL_TX2LN0_PORT2, \ + MG_TX_SWINGCTRL_TX2LN1_PORT1) #define CRI_TXDEEMPH_OVERRIDE_17_12(x) ((x) << 0) #define CRI_TXDEEMPH_OVERRIDE_17_12_MASK (0x3F << 0) @@ -2049,10 +2049,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DRVCTRL_TX1LN1_TXPORT3 0x16A544 #define MG_TX_DRVCTRL_TX1LN0_TXPORT4 0x16B144 #define MG_TX_DRVCTRL_TX1LN1_TXPORT4 0x16B544 -#define MG_TX1_DRVCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \ - MG_TX_DRVCTRL_TX1LN0_TXPORT2, \ - MG_TX_DRVCTRL_TX1LN1_TXPORT1) +#define MG_TX1_DRVCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \ + MG_TX_DRVCTRL_TX1LN0_TXPORT2, \ + MG_TX_DRVCTRL_TX1LN1_TXPORT1) #define MG_TX_DRVCTRL_TX2LN0_PORT1 0x1680C4 #define MG_TX_DRVCTRL_TX2LN1_PORT1 0x1684C4 @@ -2062,10 +2062,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DRVCTRL_TX2LN1_PORT3 0x16A4C4 #define MG_TX_DRVCTRL_TX2LN0_PORT4 0x16B0C4 #define MG_TX_DRVCTRL_TX2LN1_PORT4 0x16B4C4 -#define MG_TX2_DRVCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX2LN0_PORT1, \ - MG_TX_DRVCTRL_TX2LN0_PORT2, \ - MG_TX_DRVCTRL_TX2LN1_PORT1) +#define MG_TX2_DRVCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX2LN0_PORT1, \ + MG_TX_DRVCTRL_TX2LN0_PORT2, \ + MG_TX_DRVCTRL_TX2LN1_PORT1) #define CRI_TXDEEMPH_OVERRIDE_11_6(x) ((x) << 24) #define CRI_TXDEEMPH_OVERRIDE_11_6_MASK (0x3F << 24) #define CRI_TXDEEMPH_OVERRIDE_EN (1 << 22) @@ -2082,10 +2082,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_CLKHUB_LN1_PORT3 0x16A79C #define MG_CLKHUB_LN0_PORT4 0x16B39C #define MG_CLKHUB_LN1_PORT4 0x16B79C -#define MG_CLKHUB(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_CLKHUB_LN0_PORT1, \ - MG_CLKHUB_LN0_PORT2, \ - MG_CLKHUB_LN1_PORT1) +#define MG_CLKHUB(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_CLKHUB_LN0_PORT1, \ + MG_CLKHUB_LN0_PORT2, \ + MG_CLKHUB_LN1_PORT1) #define CFG_LOW_RATE_LKREN_EN (1 << 11) #define MG_TX_DCC_TX1LN0_PORT1 0x168110 @@ -2096,10 +2096,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DCC_TX1LN1_PORT3 0x16A510 #define MG_TX_DCC_TX1LN0_PORT4 0x16B110 #define MG_TX_DCC_TX1LN1_PORT4 0x16B510 -#define MG_TX1_DCC(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX1LN0_PORT1, \ - MG_TX_DCC_TX1LN0_PORT2, \ - MG_TX_DCC_TX1LN1_PORT1) +#define MG_TX1_DCC(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX1LN0_PORT1, \ + MG_TX_DCC_TX1LN0_PORT2, \ + MG_TX_DCC_TX1LN1_PORT1) #define MG_TX_DCC_TX2LN0_PORT1 0x168090 #define MG_TX_DCC_TX2LN1_PORT1 0x168490 #define MG_TX_DCC_TX2LN0_PORT2 0x169090 @@ -2108,10 +2108,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DCC_TX2LN1_PORT3 0x16A490 #define MG_TX_DCC_TX2LN0_PORT4 0x16B090 #define MG_TX_DCC_TX2LN1_PORT4 0x16B490 -#define MG_TX2_DCC(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX2LN0_PORT1, \ - MG_TX_DCC_TX2LN0_PORT2, \ - MG_TX_DCC_TX2LN1_PORT1) +#define MG_TX2_DCC(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX2LN0_PORT1, \ + MG_TX_DCC_TX2LN0_PORT2, \ + MG_TX_DCC_TX2LN1_PORT1) #define CFG_AMI_CK_DIV_OVERRIDE_VAL(x) ((x) << 25) #define CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK (0x3 << 25) #define CFG_AMI_CK_DIV_OVERRIDE_EN (1 << 24) @@ -2124,10 +2124,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_DP_MODE_LN1_ACU_PORT3 0x16A7A0 #define MG_DP_MODE_LN0_ACU_PORT4 0x16B3A0 #define MG_DP_MODE_LN1_ACU_PORT4 0x16B7A0 -#define MG_DP_MODE(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_DP_MODE_LN0_ACU_PORT1, \ - MG_DP_MODE_LN0_ACU_PORT2, \ - MG_DP_MODE_LN1_ACU_PORT1) +#define MG_DP_MODE(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_DP_MODE_LN0_ACU_PORT1, \ + MG_DP_MODE_LN0_ACU_PORT2, \ + MG_DP_MODE_LN1_ACU_PORT1) #define MG_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define MG_DP_MODE_CFG_DP_X1_MODE (1 << 6) #define MG_DP_MODE_CFG_TR2PWR_GATING (1 << 5) From fcde8c7eea6004e92f8b329a84c2a520a1a71bc0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 Oct 2019 19:34:59 +0100 Subject: [PATCH 296/331] drm/i915/selftests: Exercise potential false lite-restore If execlists's lite-restore is based on the common GEM context tag rather than the per-intel_context LRCA, then a context switch between two intel_contexts on the same engine derived from the same GEM context will perform a lite-restore instead of a full context switch. We can exploit this by poisoning the ringbuffer of the first context and trying to trick a simple RING_TAIL update (i.e. lite-restore) v2: Also check what happens if preempt ce[0] with ce[1] (both instances on the same engine from the same parent context) [Tvrtko] Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191002183459.26614-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 18 ++- drivers/gpu/drm/i915/gt/selftest_lrc.c | 180 +++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9877b6ba13df8..431d3b8c3371f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -235,6 +235,16 @@ static void execlists_init_reg_state(u32 *reg_state, const struct intel_ring *ring, bool close); +static void __context_pin_acquire(struct intel_context *ce) +{ + mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_); +} + +static void __context_pin_release(struct intel_context *ce) +{ + mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_); +} + static void mark_eio(struct i915_request *rq) { if (!i915_request_signaled(rq)) @@ -2761,7 +2771,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); /* Proclaim we have exclusive access to the context image! */ - mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_); + __context_pin_acquire(ce); rq = active_request(rq); if (!rq) { @@ -2825,7 +2835,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ - mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); + __context_pin_release(ce); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4439,7 +4449,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, bool scrub) { GEM_BUG_ON(!intel_context_is_pinned(ce)); - mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_); + __context_pin_acquire(ce); /* * We want a simple context + ring to execute the breadcrumb update. @@ -4465,7 +4475,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); - mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); + __context_pin_release(ce); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 93f2fcdc49bf8..e7bc2dbbb2a51 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -79,6 +79,184 @@ static int live_sanitycheck(void *arg) return err; } +static int live_unlite_restore(struct drm_i915_private *i915, int prio) +{ + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct igt_spinner spin; + int err = -ENOMEM; + + /* + * Check that we can correctly context switch between 2 instances + * on the same engine from the same parent context. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + if (igt_spinner_init(&spin, &i915->gt)) + goto err_unlock; + + ctx = kernel_context(i915); + if (!ctx) + goto err_spin; + + err = 0; + for_each_engine(engine, i915, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq[2]; + struct igt_live_test t; + int n; + + if (prio && !intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + err = -EIO; + break; + } + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(ctx, engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + /* + * Setup the pair of contexts such that if we + * lite-restore using the RING_TAIL from ce[1] it + * will execute garbage from ce[0]->ring. + */ + memset(tmp->ring->vaddr, + POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ + tmp->ring->vma->size); + + ce[n] = tmp; + } + GEM_BUG_ON(!ce[1]->ring->size); + intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); + + local_bh_disable(); /* appease lockdep */ + __context_pin_acquire(ce[1]); + __execlists_update_reg_state(ce[1], engine); + __context_pin_release(ce[1]); + local_bh_enable(); + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_ce; + } + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); + + if (!igt_wait_for_spinner(&spin, rq[0])) { + i915_request_put(rq[0]); + goto err_ce; + } + + rq[1] = i915_request_create(ce[1]); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + i915_request_put(rq[0]); + goto err_ce; + } + + if (!prio) { + /* + * Ensure we do the switch to ce[1] on completion. + * + * rq[0] is already submitted, so this should reduce + * to a no-op (a wait on a request on the same engine + * uses the submit fence, not the completion fence), + * but it will install a dependency on rq[1] for rq[0] + * that will prevent the pair being reordered by + * timeslicing. + */ + i915_request_await_dma_fence(rq[1], &rq[0]->fence); + } + + i915_request_get(rq[1]); + i915_request_add(rq[1]); + GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); + i915_request_put(rq[0]); + + if (prio) { + struct i915_sched_attr attr = { + .priority = prio, + }; + + /* Alternatively preempt the spinner with ce[1] */ + engine->schedule(rq[1], &attr); + } + + /* And switch back to ce[0] for good measure */ + rq[0] = i915_request_create(ce[0]); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + i915_request_put(rq[1]); + goto err_ce; + } + + i915_request_await_dma_fence(rq[0], &rq[1]->fence); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + +err_ce: + tasklet_kill(&engine->execlists.tasklet); /* flush submission */ + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + kernel_context_close(ctx); +err_spin: + igt_spinner_fini(&spin); +err_unlock: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_unlite_switch(void *arg) +{ + return live_unlite_restore(arg, 0); +} + +static int live_unlite_preempt(void *arg) +{ + return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -2178,6 +2356,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), + SUBTEST(live_unlite_switch), + SUBTEST(live_unlite_preempt), SUBTEST(live_timeslice_preempt), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), From 0d52cc7e0311c17dfce2249532b6f7073fcac252 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 2 Oct 2019 17:54:04 +0300 Subject: [PATCH 297/331] drm/i915: use DRM_ERROR() instead of drm_err() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify on current common usage to allow repurposing drm_err() later. Fix newlines while at it. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191002145405.27848-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 20ec8a1dc5e16..32ca766d9a633 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12562,10 +12562,10 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, drm_dbg(DRM_UT_KMS, "found"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, b); } else { - drm_err("mismatch in %s infoframe", name); - drm_err("expected:"); + DRM_ERROR("mismatch in %s infoframe\n", name); + DRM_ERROR("expected:\n"); hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, a); - drm_err("found"); + DRM_ERROR("found:\n"); hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, b); } } @@ -12583,7 +12583,7 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) if (fastset) drm_dbg(DRM_UT_KMS, "fastset mismatch in %s %pV", name, &vaf); else - drm_err("mismatch in %s %pV", name, &vaf); + DRM_ERROR("mismatch in %s %pV\n", name, &vaf); va_end(args); } From 48c38154d539e3c3fd32b0cdf654b10e5631d18c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 2 Oct 2019 17:54:05 +0300 Subject: [PATCH 298/331] drm/i915: use DRM_DEBUG_KMS() instead of drm_dbg(DRM_UT_KMS, ...) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify on current common usage to allow repurposing drm_dbg() later. Fix newlines while at it. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191002145405.27848-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 32ca766d9a633..c3ac5a5c5185a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12556,10 +12556,10 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, if ((drm_debug & DRM_UT_KMS) == 0) return; - drm_dbg(DRM_UT_KMS, "fastset mismatch in %s infoframe", name); - drm_dbg(DRM_UT_KMS, "expected:"); + DRM_DEBUG_KMS("fastset mismatch in %s infoframe\n", name); + DRM_DEBUG_KMS("expected:\n"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, a); - drm_dbg(DRM_UT_KMS, "found"); + DRM_DEBUG_KMS("found:\n"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, b); } else { DRM_ERROR("mismatch in %s infoframe\n", name); @@ -12581,7 +12581,7 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) vaf.va = &args; if (fastset) - drm_dbg(DRM_UT_KMS, "fastset mismatch in %s %pV", name, &vaf); + DRM_DEBUG_KMS("fastset mismatch in %s %pV\n", name, &vaf); else DRM_ERROR("mismatch in %s %pV\n", name, &vaf); From 3032c0b47e5f71a9a3c4c8e7bfa83d9005814510 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 2 Oct 2019 23:41:08 +0300 Subject: [PATCH 299/331] drm/i915/tgl: Add the Thunderbolt PLL divider values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Thunderbolt PLL divider values on TGL differ from the ICL ones, update the PLL parameter calculation function accordingly. Bspec: 49204 v2: - Remove unused refclk config. (José) Cc: Jose Souza Cc: Clinton A Taylor Cc: Lucas De Marchi Cc: Mika Westerberg Tested-by: Mika Westerberg Signed-off-by: Imre Deak Reviewed-by: Jose Souza Link: https://patchwork.freedesktop.org/patch/msgid/20191002204108.32242-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index be69a23442940..5e9e84c94a15a 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2520,6 +2520,18 @@ static const struct skl_wrpll_params icl_tbt_pll_19_2MHz_values = { .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0, }; +static const struct skl_wrpll_params tgl_tbt_pll_19_2MHz_values = { + .dco_integer = 0x54, .dco_fraction = 0x3000, + /* the following params are unused */ + .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, +}; + +static const struct skl_wrpll_params tgl_tbt_pll_24MHz_values = { + .dco_integer = 0x43, .dco_fraction = 0x4000, + /* the following params are unused */ + .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, +}; + static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *pll_params) { @@ -2547,8 +2559,34 @@ static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - *pll_params = dev_priv->cdclk.hw.ref == 24000 ? - icl_tbt_pll_24MHz_values : icl_tbt_pll_19_2MHz_values; + if (INTEL_GEN(dev_priv) >= 12) { + switch (dev_priv->cdclk.hw.ref) { + default: + MISSING_CASE(dev_priv->cdclk.hw.ref); + /* fall-through */ + case 19200: + case 38400: + *pll_params = tgl_tbt_pll_19_2MHz_values; + break; + case 24000: + *pll_params = tgl_tbt_pll_24MHz_values; + break; + } + } else { + switch (dev_priv->cdclk.hw.ref) { + default: + MISSING_CASE(dev_priv->cdclk.hw.ref); + /* fall-through */ + case 19200: + case 38400: + *pll_params = icl_tbt_pll_19_2MHz_values; + break; + case 24000: + *pll_params = icl_tbt_pll_24MHz_values; + break; + } + } + return true; } From 44d0a9c05bc078aef75229d0750e9c55959caae4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 Oct 2019 22:00:56 +0100 Subject: [PATCH 300/331] drm/i915/execlists: Skip redundant resubmission If we unwind the active requests, and on resubmission discover that we intend to preempt the active contexts with themselves, simply skip the ELSP submission. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191003210100.22250-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 431d3b8c3371f..3cfea1758fd2f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1739,11 +1739,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (submit) { *port = execlists_schedule_in(last, port - execlists->pending); - memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists->switch_priority_hint = switch_prio(engine, *execlists->pending); + + /* + * Skip if we ended up with exactly the same set of requests, + * e.g. trying to timeslice a pair of ordered contexts + */ + if (!memcmp(execlists->active, execlists->pending, + (port - execlists->pending + 1) * sizeof(*port))) { + do + execlists_schedule_out(fetch_and_zero(port)); + while (port-- != execlists->pending); + + goto skip_submit; + } + + memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); } else { +skip_submit: ring_set_paused(engine, 0); } } From 81cdeca45a061f0ec9ecbbf7de864e2a57df5ee4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 2 Oct 2019 17:41:38 +0300 Subject: [PATCH 301/331] drm/i915/dp: remove static variable for aux last status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add aux_busy_last_status to intel_dp. Don't bother with initializing to all ones; the only difference is potentially missing logging for one error case if the readout is all zeros. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191002144138.7917-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 976669f01a8c3..1602aac7ca0fe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1185,6 +1185,7 @@ struct intel_dp { /* sink or branch descriptor */ struct drm_dp_desc desc; struct drm_dp_aux aux; + u32 aux_busy_last_status; u8 train_set[4]; int panel_power_up_delay; int panel_power_down_delay; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b788e332e76a7..38aa09cfbed3e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1347,13 +1347,12 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (try == 3) { - static u32 last_status = -1; const u32 status = intel_uncore_read(uncore, ch_ctl); - if (status != last_status) { + if (status != intel_dp->aux_busy_last_status) { WARN(1, "dp_aux_ch not started status 0x%08x\n", status); - last_status = status; + intel_dp->aux_busy_last_status = status; } ret = -EBUSY; From 1580d3cdddbba4a5ef78a04a5289e32844e6af24 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Thu, 3 Oct 2019 11:55:30 +0300 Subject: [PATCH 302/331] drm/i915: Fix audio power up sequence for gen10+ display On platfroms with gen10+ display, driver must set the enable bit of AUDIO_PIN_BUF_CTL register before transactions with the HDA controller can proceed. Add setting this bit to the audio power up sequence. Failing to do this resulted in errors during display audio codec probe, and failures during resume from suspend. Note: We may also need to disable the bit afterwards, but there are still unresolved issues with that. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111214 Signed-off-by: Kai Vehmanen Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191003085531.30990-1-kai.vehmanen@linux.intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 5 +++++ drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 54638d99e0217..e93776710abce 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -862,6 +862,11 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) /* Force CDCLK to 2*BCLK as long as we need audio powered. */ if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, true); + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + I915_WRITE(AUD_PIN_BUF_CTL, + (I915_READ(AUD_PIN_BUF_CTL) | + AUD_PIN_BUF_ENABLE)); } return ret; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index eefd789b9a282..813ddea3f9f1f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9133,6 +9133,8 @@ enum { #define SKL_AUD_CODEC_WAKE_SIGNAL (1 << 15) #define AUD_FREQ_CNTRL _MMIO(0x65900) +#define AUD_PIN_BUF_CTL _MMIO(0x48414) +#define AUD_PIN_BUF_ENABLE REG_BIT(31) /* * HSW - ICL power wells From f6ec9483091f8e67adab0311a4e2f90aab523310 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Thu, 3 Oct 2019 11:55:31 +0300 Subject: [PATCH 303/331] drm/i915: extend audio CDCLK>=2*BCLK constraint to more platforms The CDCLK>=2*BCLK constraint applies to all generations since gen10. Extend the constraint logic in audio get/put_power(). Signed-off-by: Kai Vehmanen Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191003085531.30990-2-kai.vehmanen@linux.intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index e93776710abce..ed18511befa37 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -860,7 +860,7 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) } /* Force CDCLK to 2*BCLK as long as we need audio powered. */ - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, true); if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) @@ -879,7 +879,7 @@ static void i915_audio_component_put_power(struct device *kdev, /* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */ if (--dev_priv->audio_power_refcount == 0) - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, false); intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie); From 261ea7e29ed1e6a172d4ba829f4d5e3706ab29e4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 11:59:58 +0100 Subject: [PATCH 304/331] drm/i915: Restrict L3 remapping sysfs interface to dwords The L3 cache remapping is stored as u32 elements, and we should ensure that the user only supplies complete slice information(u32). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004105958.1741-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sysfs.c | 57 ++++++++++++++++--------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 034b8abc50624..1e08c5961535b 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -144,12 +144,12 @@ static const struct attribute_group media_rc6_attr_group = { }; #endif -static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset) +static int l3_access_valid(struct drm_i915_private *i915, loff_t offset) { - if (!HAS_L3_DPF(dev_priv)) + if (!HAS_L3_DPF(i915)) return -EPERM; - if (offset % 4 != 0) + if (!IS_ALIGNED(offset, sizeof(u32))) return -EINVAL; if (offset >= GEN7_L3LOG_SIZE) @@ -164,31 +164,28 @@ i915_l3_read(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; int ret; - count = round_down(count, 4); - - ret = l3_access_valid(dev_priv, offset); + ret = l3_access_valid(i915, offset); if (ret) return ret; + count = round_down(count, sizeof(u32)); count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count); + memset(buf, 0, count); - ret = i915_mutex_lock_interruptible(dev); + ret = i915_mutex_lock_interruptible(&i915->drm); if (ret) return ret; - if (dev_priv->l3_parity.remap_info[slice]) + if (i915->l3_parity.remap_info[slice]) memcpy(buf, - dev_priv->l3_parity.remap_info[slice] + (offset/4), + i915->l3_parity.remap_info[slice] + offset / sizeof(u32), count); - else - memset(buf, 0, count); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&i915->drm.struct_mutex); return count; } @@ -199,22 +196,24 @@ i915_l3_write(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; + struct i915_gem_context *ctx; u32 **remap_info; int ret; - ret = l3_access_valid(dev_priv, offset); + ret = l3_access_valid(i915, offset); if (ret) return ret; - ret = i915_mutex_lock_interruptible(dev); + if (count < sizeof(u32)) + return -EINVAL; + + ret = i915_mutex_lock_interruptible(&i915->drm); if (ret) return ret; - remap_info = &dev_priv->l3_parity.remap_info[slice]; + remap_info = &i915->l3_parity.remap_info[slice]; if (!*remap_info) { *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); if (!*remap_info) { @@ -223,20 +222,22 @@ i915_l3_write(struct file *filp, struct kobject *kobj, } } - /* TODO: Ideally we really want a GPU reset here to make sure errors + count = round_down(count, sizeof(u32)); + memcpy(*remap_info + offset / sizeof(u32), buf, count); + + /* NB: We defer the remapping until we switch to the context */ + list_for_each_entry(ctx, &i915->contexts.list, link) + ctx->remap_slice |= BIT(slice); + + /* + * TODO: Ideally we really want a GPU reset here to make sure errors * aren't propagated. Since I cannot find a stable way to reset the GPU * at this point it is left as a TODO. */ - memcpy(*remap_info + (offset/4), buf, count); - - /* NB: We defer the remapping until we switch to the context */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) - ctx->remap_slice |= (1<struct_mutex); + mutex_unlock(&i915->drm.struct_mutex); return ret; } From b290a78b5c3d78c337b8d9856cc0671aea0c2ef3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 Oct 2019 22:00:58 +0100 Subject: [PATCH 305/331] drm/i915: Use helpers for drm_mm_node booleans A subset of 71724f708997 ("drm/mm: Use helpers for drm_mm_node booleans") in order to prepare drm-intel-next-queued for subsequent patches before we can backmerge 71724f708997 itself. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004142226.13711-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/i915_vma.h | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 8fbb454cfd6bd..228ce24ea2806 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -968,7 +968,7 @@ static void reloc_cache_reset(struct reloc_cache *cache) intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); @@ -1061,7 +1061,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, } offset = cache->node.start; - if (cache->node.allocated) { + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index bb878119f06ca..bb4889d2346d8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -387,7 +387,7 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt) { struct drm_mm_node *node = &ggtt->uc_fw; - GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(!drm_mm_node_allocated(node)); GEM_BUG_ON(upper_32_bits(node->start)); GEM_BUG_ON(upper_32_bits(node->start + node->size - 1)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1426e506700d1..fa8e028ac0b56 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -356,7 +356,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) goto out_unlock; - GEM_BUG_ON(!node.allocated); + GEM_BUG_ON(!drm_mm_node_allocated(&node)); } mutex_unlock(&i915->drm.struct_mutex); @@ -393,7 +393,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, unsigned page_offset = offset_in_page(offset); unsigned page_length = PAGE_SIZE - page_offset; page_length = remain < page_length ? remain : page_length; - if (node.allocated) { + if (drm_mm_node_allocated(&node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), node.start, I915_CACHE_NONE, 0); @@ -415,7 +415,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: mutex_lock(&i915->drm.struct_mutex); - if (node.allocated) { + if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); remove_mappable_node(&node); } else { @@ -566,7 +566,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) goto out_rpm; - GEM_BUG_ON(!node.allocated); + GEM_BUG_ON(!drm_mm_node_allocated(&node)); } mutex_unlock(&i915->drm.struct_mutex); @@ -604,7 +604,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, unsigned int page_offset = offset_in_page(offset); unsigned int page_length = PAGE_SIZE - page_offset; page_length = remain < page_length ? remain : page_length; - if (node.allocated) { + if (drm_mm_node_allocated(&node)) { /* flush the write before we modify the GGTT */ intel_gt_flush_ggtt_writes(ggtt->vm.gt); ggtt->vm.insert_page(&ggtt->vm, @@ -636,7 +636,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, out_unpin: mutex_lock(&i915->drm.struct_mutex); intel_gt_flush_ggtt_writes(ggtt->vm.gt); - if (node.allocated) { + if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); remove_mappable_node(&node); } else { diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index e76c9da9992db..8c1e04f402bcc 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -299,7 +299,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } - GEM_BUG_ON(!node->allocated); + GEM_BUG_ON(!drm_mm_node_allocated(node)); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9d5b0f87c2100..68c34b1a20e48 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -795,7 +795,7 @@ void i915_vma_reopen(struct i915_vma *vma) static void __i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->fence); mutex_lock(&vma->vm->mutex); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8bcb5812c4467..e49b199f7de7a 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -228,7 +228,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(upper_32_bits(vma->node.start)); GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); return lower_32_bits(vma->node.start); @@ -390,7 +390,7 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma, static inline bool i915_node_color_differs(const struct drm_mm_node *node, unsigned long color) { - return node->allocated && node->color != color; + return drm_mm_node_allocated(node) && node->color != color; } /** From 5e053450c1c3af316aef9a96dc88f52cd803d1ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:39:56 +0100 Subject: [PATCH 306/331] drm/i915: Only track bound elements of the GTT The premise here is to simply avoiding having to acquire the vm->mutex inside vma create/destroy to update the vm->unbound_lists, to avoid some nasty lock recursions later. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++--------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ---- drivers/gpu/drm/i915/i915_vma.c | 12 ++-------- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 5 files changed, 8 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index bfbc3e3daf924..e45eb87218506 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -692,7 +692,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv __i915_vma_set_map_and_fenceable(vma); mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e62e9d1a13075..ad9eb2d68f3f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -505,19 +505,12 @@ static void i915_address_space_fini(struct i915_address_space *vm) static void ppgtt_destroy_vma(struct i915_address_space *vm) { - struct list_head *phases[] = { - &vm->bound_list, - &vm->unbound_list, - NULL, - }, **phase; + struct i915_vma *vma, *vn; mutex_lock(&vm->i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - struct i915_vma *vma, *vn; - - list_for_each_entry_safe(vma, vn, *phase, vm_link) - i915_vma_destroy(vma); - } + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) + i915_vma_destroy(vma); + GEM_BUG_ON(!list_empty(&vm->bound_list)); mutex_unlock(&vm->i915->drm.struct_mutex); } @@ -528,9 +521,6 @@ static void __i915_vm_release(struct work_struct *work) ppgtt_destroy_vma(vm); - GEM_BUG_ON(!list_empty(&vm->bound_list)); - GEM_BUG_ON(!list_empty(&vm->unbound_list)); - vm->cleanup(vm); i915_address_space_fini(vm); @@ -569,7 +559,6 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->unbound_list); INIT_LIST_HEAD(&vm->bound_list); } @@ -1887,10 +1876,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->closed_link); - mutex_lock(&vma->vm->mutex); - list_add(&vma->vm_link, &vma->vm->unbound_list); - mutex_unlock(&vma->vm->mutex); - return vma; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 8fd2234ba0bf4..bbdc735466c14 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -320,11 +320,6 @@ struct i915_address_space { */ struct list_head bound_list; - /** - * List of vma that are not unbound. - */ - struct list_head unbound_list; - struct pagestash free_pages; /* Global GTT */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 68c34b1a20e48..d097f77890ba8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -218,10 +218,6 @@ vma_create(struct drm_i915_gem_object *obj, spin_unlock(&obj->vma.lock); - mutex_lock(&vm->mutex); - list_add(&vma->vm_link, &vm->unbound_list); - mutex_unlock(&vm->mutex); - return vma; err_vma: @@ -657,7 +653,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); if (vma->obj) { @@ -685,7 +681,7 @@ i915_vma_remove(struct i915_vma *vma) mutex_lock(&vma->vm->mutex); drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + list_del(&vma->vm_link); mutex_unlock(&vma->vm->mutex); /* @@ -798,10 +794,6 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->fence); - mutex_lock(&vma->vm->mutex); - list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); - if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f901bbb9586e0..0945d6e978a2e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1242,7 +1242,7 @@ static void track_vma_bind(struct i915_vma *vma) vma->pages = obj->mm.pages; mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); } From 11331125e1480ff786be9d2051301401b652bbe1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:39:57 +0100 Subject: [PATCH 307/331] drm/i915: Mark up address spaces that may need to allocate Since we cannot allocate underneath the vm->mutex (it is used in the direct-reclaim paths), we need to shift the allocations off into a mutexless worker with fence recursion prevention. To know when we need this protection, we mark up the address spaces that do allocate before insertion. In the future, we may wish to extend the async bind scheme to more than just allocations. v2: s/vm->bind_alloc/vm->bind_async_flags/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad9eb2d68f3f3..8eba63ecdb03c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1502,6 +1502,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) goto err_free_pd; } + ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; @@ -1950,6 +1951,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt_init(&ppgtt->base, &i915->gt); ppgtt->base.vm.top = 1; + ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; @@ -2581,6 +2583,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) goto err_ppgtt; ggtt->alias = ppgtt; + ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index bbdc735466c14..3502b9c85a8ef 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -305,6 +305,8 @@ struct i915_address_space { u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ + unsigned int bind_async_flags; + bool closed; struct mutex mutex; /* protects vma and our lists */ From 2850748ef8763ab46958e43a4d1c445f29eeb37d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:39:58 +0100 Subject: [PATCH 308/331] drm/i915: Pull i915_vma_pin under the vm->mutex Replace the struct_mutex requirement for pinning the i915_vma with the local vm->mutex instead. Note that the vm->mutex is tainted by the shrinker (we require unbinding from inside fs-reclaim) and so we cannot allocate while holding that mutex. Instead we have to preallocate workers to do allocate and apply the PTE updates after we have we reserved their slot in the drm_mm (using fences to order the PTE writes with the GPU work and with later unbind). In adding the asynchronous vma binding, one subtle requirement is to avoid coupling the binding fence into the backing object->resv. That is the asynchronous binding only applies to the vma timeline itself and not to the pages as that is a more global timeline (the binding of one vma does not need to be ordered with another vma, nor does the implicit GEM fencing depend on a vma, only on writes to the backing store). Keeping the vma binding distinct from the backing store timelines is verified by a number of async gem_exec_fence and gem_exec_schedule tests. The way we do this is quite simple, we keep the fence for the vma binding separate and only wait on it as required, and never add it to the obj->resv itself. Another consequence in reducing the locking around the vma is the destruction of the vma is no longer globally serialised by struct_mutex. A natural solution would be to add a kref to i915_vma, but that requires decoupling the reference cycles, possibly by introducing a new i915_mm_pages object that is own by both obj->mm and vma->pages. However, we have not taken that route due to the overshadowing lmem/ttm discussions, and instead play a series of complicated games with trylocks to (hopefully) ensure that only one destruction path is called! v2: Add some commentary, and some helpers to reduce patch churn. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 29 +- drivers/gpu/drm/i915/display/intel_dsb.c | 7 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 8 +- drivers/gpu/drm/i915/display/intel_overlay.c | 11 +- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 20 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 13 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 43 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 20 +- drivers/gpu/drm/i915/gem/i915_gem_object.c | 33 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 5 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 73 +-- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 8 +- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 27 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 27 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 9 +- .../drm/i915/gem/selftests/i915_gem_context.c | 12 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 4 - .../drm/i915/gem/selftests/igt_gem_utils.c | 7 +- drivers/gpu/drm/i915/gt/intel_gt.c | 5 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 19 +- drivers/gpu/drm/i915/gvt/aperture_gm.c | 12 +- drivers/gpu/drm/i915/i915_active.c | 95 +++- drivers/gpu/drm/i915/i915_active.h | 7 + drivers/gpu/drm/i915/i915_active_types.h | 5 + drivers/gpu/drm/i915/i915_drv.c | 2 - drivers/gpu/drm/i915/i915_gem.c | 83 ++- drivers/gpu/drm/i915/i915_gem_evict.c | 28 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 9 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 109 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 45 +- drivers/gpu/drm/i915/i915_perf.c | 32 +- drivers/gpu/drm/i915/i915_vma.c | 524 ++++++++++++------ drivers/gpu/drm/i915/i915_vma.h | 84 +-- drivers/gpu/drm/i915/selftests/i915_gem.c | 2 - .../gpu/drm/i915/selftests/i915_gem_evict.c | 36 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 58 +- drivers/gpu/drm/i915/selftests/i915_request.c | 7 + drivers/gpu/drm/i915/selftests/i915_vma.c | 6 +- 40 files changed, 824 insertions(+), 706 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c3ac5a5c5185a..8f7365b8dffb7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2079,7 +2079,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int pinctl; u32 alignment; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) return ERR_PTR(-EINVAL); @@ -2163,8 +2162,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - i915_gem_object_lock(vma->obj); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); @@ -3065,12 +3062,10 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - mutex_unlock(&dev->struct_mutex); if (!obj) return false; @@ -3232,13 +3227,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, intel_state->color_plane[0].stride = intel_fb_pitch(fb, 0, intel_state->base.rotation); - mutex_lock(&dev->struct_mutex); intel_state->vma = intel_pin_and_fence_fb_obj(fb, &intel_state->view, intel_plane_uses_fence(intel_state), &intel_state->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", intel_crtc->pipe, PTR_ERR(intel_state->vma)); @@ -14365,8 +14358,6 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) * bits. Some older platforms need special physical address handling for * cursor planes. * - * Must be called with struct_mutex held. - * * Returns 0 on success, negative error code on failure. */ int @@ -14423,15 +14414,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; @@ -14480,8 +14464,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, * @old_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. - * - * Must be called with struct_mutex held. */ void intel_cleanup_plane_fb(struct drm_plane *plane, @@ -14497,9 +14479,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } /* Should only be called after a successful intel_prepare_plane_fb()! */ - mutex_lock(&dev_priv->drm.struct_mutex); intel_plane_unpin_fb(to_intel_plane_state(old_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); } int @@ -14702,7 +14682,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct drm_plane_state *old_plane_state, *new_plane_state; struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_crtc_state *crtc_state = @@ -14768,13 +14747,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - goto out_free; - ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); if (ret) - goto out_unlock; + goto out_free; intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP); intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb), @@ -14804,8 +14779,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); out_free: if (new_crtc_state) intel_crtc_destroy_state(crtc, &new_crtc_state->base); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 0a0a1536ac96e..bb5a0e91b370a 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -119,9 +119,7 @@ intel_dsb_get(struct intel_crtc *crtc) goto err; } - mutex_lock(&i915->drm.struct_mutex); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(vma)) { DRM_ERROR("Vma creation failed\n"); i915_gem_object_put(obj); @@ -164,10 +162,7 @@ void intel_dsb_put(struct intel_dsb *dsb) return; if (atomic_dec_and_test(&dsb->refcount)) { - mutex_lock(&i915->drm.struct_mutex); - i915_gem_object_unpin_map(dsb->vma->obj); - i915_vma_unpin_and_release(&dsb->vma, 0); - mutex_unlock(&i915->drm.struct_mutex); + i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP); dsb->cmd_buf = NULL; dsb->free_pos = 0; dsb->ins_start_offset = 0; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 68338669f0542..97cde017670a1 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - mutex_lock(&dev->struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); /* Pin the GGTT vma for our access via info->screen_base. @@ -266,7 +265,6 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma_flags = flags; intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -274,7 +272,6 @@ static int intelfb_create(struct drm_fb_helper *helper, intel_unpin_fb_vma(vma, flags); out_unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); return ret; } @@ -291,11 +288,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) drm_fb_helper_fini(&ifbdev->helper); - if (ifbdev->vma) { - mutex_lock(&ifbdev->helper.dev->struct_mutex); + if (ifbdev->vma) intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); - mutex_unlock(&ifbdev->helper.dev->struct_mutex); - } if (ifbdev->fb) drm_framebuffer_remove(&ifbdev->fb->base); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 5efef9babadb1..3f4ac1ee76684 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -1303,15 +1303,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) struct i915_vma *vma; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); if (obj == NULL) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { @@ -1332,13 +1328,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) } overlay->reg_bo = obj; - mutex_unlock(&i915->drm.struct_mutex); return 0; err_put_bo: i915_gem_object_put(obj); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 7f61a80241334..c1fca5728e6ec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work) * keep track of the GPU activity within this vma/request, and * propagate the signal from the request to w->dma. */ - err = i915_active_add_request(&vma->active, rq); + err = __i915_vma_move_to_active(vma, rq); if (err) goto out_request; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f7ba0935ed670..95f8e66e45db0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -313,8 +313,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); release_hw_id(ctx); - if (ctx->vm) - i915_vm_put(ctx->vm); free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -379,9 +377,13 @@ void i915_gem_context_release(struct kref *ref) static void context_close(struct i915_gem_context *ctx) { + i915_gem_context_set_closed(ctx); + + if (ctx->vm) + i915_vm_close(ctx->vm); + mutex_lock(&ctx->mutex); - i915_gem_context_set_closed(ctx); ctx->file_priv = ERR_PTR(-EBADF); /* @@ -474,7 +476,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_get(vm); + ctx->vm = i915_vm_open(vm); context_apply_all(ctx, __apply_ppgtt, vm); return old; @@ -488,7 +490,7 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, vm = __set_ppgtt(ctx, vm); if (vm) - i915_vm_put(vm); + i915_vm_close(vm); } static void __set_timeline(struct intel_timeline **dst, @@ -953,7 +955,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret < 0) goto err_unlock; - i915_vm_get(vm); + i915_vm_open(vm); args->size = 0; args->value = ret; @@ -973,7 +975,7 @@ static void set_ppgtt_barrier(void *data) if (INTEL_GEN(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - i915_vm_put(old); + i915_vm_close(old); } static int emit_ppgtt_update(struct i915_request *rq, void *data) @@ -1090,8 +1092,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - i915_vm_put(__set_ppgtt(ctx, old)); - i915_vm_put(old); + i915_vm_close(__set_ppgtt(ctx, old)); + i915_vm_close(old); } unlock: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 55c3ab59e3aa4..9937b4c341f1a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -288,7 +288,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (!drm_mm_node_allocated(&vma->node)) continue; - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + /* Wait for an earlier async bind, need to rewrite it */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); if (ret) return ret; } @@ -391,16 +396,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - ret = i915_gem_object_lock_interruptible(obj); if (ret == 0) { ret = i915_gem_object_set_cache_level(obj, level); i915_gem_object_unlock(obj); } - mutex_unlock(&i915->drm.struct_mutex); out: i915_gem_object_put(obj); @@ -485,6 +485,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) if (!drm_mm_node_allocated(&vma->node)) continue; + GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } mutex_unlock(&i915->ggtt.vm.mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 228ce24ea2806..42c15e8bf1666 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -698,7 +698,9 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ + mutex_lock(&eb->context->vm->mutex); err = i915_gem_evict_vm(eb->context->vm); + mutex_unlock(&eb->context->vm->mutex); if (err) return err; break; @@ -972,7 +974,9 @@ static void reloc_cache_reset(struct reloc_cache *cache) ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } @@ -1047,11 +1051,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { @@ -1416,7 +1422,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); + PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) return err; @@ -2142,35 +2148,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int -__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - int err; - - if (likely(atomic_inc_not_zero(&ce->pin_count))) - return 0; - - err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); - if (err) - return err; - - err = __intel_context_do_pin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); - - return err; -} - -static void -__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) - return; - - mutex_lock(&eb->i915->drm.struct_mutex); - intel_context_unpin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); -} - static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_timeline *tl; @@ -2190,7 +2167,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = __eb_pin_context(eb, ce); + err = intel_context_pin(ce); if (err) return err; @@ -2234,7 +2211,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) intel_context_exit(ce); intel_context_timeline_unlock(tl); err_unpin: - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); return err; } @@ -2247,7 +2224,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) intel_context_exit(ce); mutex_unlock(&tl->mutex); - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); } static unsigned int diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index dd0c2840ba4d5..c19431d609fca 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -249,16 +249,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_rpm; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } - /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | @@ -291,7 +281,13 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; + } + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unpin; } ret = i915_vma_pin_fence(vma); @@ -329,8 +325,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 0ef60dae23a7e..dbf9be9a79f47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { - struct i915_vma *vma, *vn; - trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { - GEM_BUG_ON(i915_vma_is_active(vma)); - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - i915_vma_destroy(vma); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); + + i915_vma_destroy(vma); + + spin_lock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); } - GEM_BUG_ON(!list_empty(&obj->vma.list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - - mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 53c7069ba3e89..086a9bf5adcc3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -106,6 +106,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) dma_resv_lock(obj->base.resv, NULL); } +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); +} + static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index d2c05d7529098..fd3ce6da8497d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -155,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, shrink, &unlock)) - return 0; /* * When shrinking the active list, we should also consider active @@ -268,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -339,19 +299,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); + I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -366,8 +321,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -384,6 +337,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -419,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -439,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) + if (__i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } @@ -490,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_acquire(GFP_KERNEL); - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); mutex_release(&mutex->dep_map, 0, _RET_IP_); - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - fs_reclaim_release(GFP_KERNEL); if (unlock) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index e45eb87218506..fad98a921cde8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -624,8 +624,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (!drm_mm_initialized(&dev_priv->mm.stolen)) return NULL; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -677,21 +675,25 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + mutex_unlock(&ggtt->vm.mutex); goto err_pages; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->pages); vma->pages = obj->mm.pages; + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index e5d1ae8d4dbad..dc2a83ce44d52 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -181,22 +181,25 @@ static int i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; struct i915_vma *vma; - int ret; + int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; + mutex_lock(&ggtt->vm.mutex); for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); if (ret) - return ret; + break; } + mutex_unlock(&ggtt->vm.mutex); - return 0; + return ret; } int @@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; @@ -233,16 +235,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - i915_gem_object_lock(obj); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; } + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + i915_gem_object_unlock(obj); + return err; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -368,12 +372,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index a0d974ac78a73..3798502097b41 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (ret == 0) ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c5cea43792163..98b2a6ccfcc1a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -333,7 +333,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -1390,7 +1395,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 0f4d0644a4800..8eba0d3a31def 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -971,10 +971,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -988,8 +985,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -1533,9 +1529,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; - i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&vma, 0); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index aefe557527f81..36aca1c172e77 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -322,7 +322,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -415,7 +414,6 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -458,7 +456,6 @@ static int igt_smoke_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); count = 0; @@ -508,7 +505,6 @@ static int igt_smoke_tiling(void *arg) pr_info("%s: Completed %lu trials\n", __func__, count); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index ee5dc13a30b33..6718da20f35d2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -154,9 +154,7 @@ int igt_gpu_fill_dw(struct intel_context *ce, i915_request_add(rq); - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return 0; @@ -165,7 +163,6 @@ int igt_gpu_fill_dw(struct intel_context *ce, err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 0e5909ee06579..7205595369be2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -302,11 +302,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { struct intel_uncore *uncore = gt->uncore; + unsigned long flags; - spin_lock_irq(&uncore->lock); + spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&uncore->lock); + spin_unlock_irqrestore(&uncore->lock, flags); } } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 0747b8c9f7683..ec32996254c09 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1338,15 +1338,13 @@ void intel_ring_free(struct kref *ref) { struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - i915_vma_close(ring->vma); i915_vma_put(ring->vma); - kfree(ring); } static void __ring_context_fini(struct intel_context *ce) { - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void ring_context_destroy(struct kref *ref) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 9c0c8441c22a5..d3bee9f880088 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1127,15 +1127,14 @@ static int evict_vma(void *data) { struct evict_vma *arg = data; struct i915_address_space *vm = arg->vma->vm; - struct drm_i915_private *i915 = vm->i915; struct drm_mm_node evict = arg->vma->node; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&vm->mutex); err = i915_gem_evict_for_node(vm, &evict, 0); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); return err; } @@ -1143,39 +1142,33 @@ static int evict_vma(void *data) static int evict_fence(void *data) { struct evict_vma *arg = data; - struct drm_i915_private *i915 = arg->vma->vm->i915; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); - /* Mark the fence register as dirty to force the mmio update. */ err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); if (err) { pr_err("Invalid Y-tiling settings; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); if (err) { pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin_fence(arg->vma); i915_vma_unpin(arg->vma); if (err) { pr_err("Unable to pin Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } i915_vma_unpin_fence(arg->vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return 0; } static int __igt_reset_evict_vma(struct intel_gt *gt, diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 5ff2437b2998a..d996bbc7ea59a 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) flags = PIN_MAPPABLE; } - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); mmio_hw_access_pre(dev_priv); ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); mmio_hw_access_post(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); if (ret) gvt_err("fail to alloc %s gm space from host\n", high_gm ? "high" : "low"); @@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu) return 0; out_free_aperture: - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); return ret; } @@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); drm_mm_remove_node(&vgpu->gm.high_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); } /** diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index d5aac6ff803a3..0791736a08fd2 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -146,6 +146,7 @@ __active_retire(struct i915_active *ref) if (!retire) return; + GEM_BUG_ON(rcu_access_pointer(ref->excl)); rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_request_isset(&it->base)); kmem_cache_free(global.slab_cache, it); @@ -245,6 +246,8 @@ void __i915_active_init(struct drm_i915_private *i915, ref->flags = 0; ref->active = active; ref->retire = retire; + + ref->excl = NULL; ref->tree = RB_ROOT; ref->cache = NULL; init_llist_head(&ref->preallocated_barriers); @@ -341,6 +344,46 @@ int i915_active_ref(struct i915_active *ref, return err; } +static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb); + + RCU_INIT_POINTER(ref->excl, NULL); + dma_fence_put(f); + + active_retire(ref); +} + +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +{ + /* We expect the caller to manage the exclusive timeline ordering */ + GEM_BUG_ON(i915_active_is_idle(ref)); + + dma_fence_get(f); + + rcu_read_lock(); + if (rcu_access_pointer(ref->excl)) { + struct dma_fence *old; + + old = dma_fence_get_rcu_safe(&ref->excl); + if (old) { + if (dma_fence_remove_callback(old, &ref->excl_cb)) + atomic_dec(&ref->count); + dma_fence_put(old); + } + } + rcu_read_unlock(); + + atomic_inc(&ref->count); + rcu_assign_pointer(ref->excl, f); + + if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) { + RCU_INIT_POINTER(ref->excl, NULL); + atomic_dec(&ref->count); + dma_fence_put(f); + } +} + int i915_active_acquire(struct i915_active *ref) { int err; @@ -399,6 +442,25 @@ void i915_active_ungrab(struct i915_active *ref) __active_ungrab(ref); } +static int excl_wait(struct i915_active *ref) +{ + struct dma_fence *old; + int err = 0; + + if (!rcu_access_pointer(ref->excl)) + return 0; + + rcu_read_lock(); + old = dma_fence_get_rcu_safe(&ref->excl); + rcu_read_unlock(); + if (old) { + err = dma_fence_wait(old, true); + dma_fence_put(old); + } + + return err; +} + int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; @@ -419,6 +481,10 @@ int i915_active_wait(struct i915_active *ref) return 0; } + err = excl_wait(ref); + if (err) + goto out; + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { if (is_barrier(&it->base)) { /* unconnected idle-barrier */ err = -EBUSY; @@ -430,6 +496,7 @@ int i915_active_wait(struct i915_active *ref) break; } +out: __active_retire(ref); if (err) return err; @@ -454,26 +521,22 @@ int i915_request_await_active_request(struct i915_request *rq, int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) { - struct active_node *it, *n; - int err; - - if (RB_EMPTY_ROOT(&ref->tree)) - return 0; + int err = 0; - /* await allocates and so we need to avoid hitting the shrinker */ - err = i915_active_acquire(ref); - if (err) - return err; + if (rcu_access_pointer(ref->excl)) { + struct dma_fence *fence; - mutex_lock(&ref->mutex); - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - err = i915_request_await_active_request(rq, &it->base); - if (err) - break; + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&ref->excl); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } } - mutex_unlock(&ref->mutex); - i915_active_release(ref); + /* In the future we may choose to await on all fences */ + return err; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 949c6835335bc..90034f61b7c25 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -379,6 +379,13 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq) return i915_active_ref(ref, i915_request_timeline(rq), rq); } +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); + +static inline bool i915_active_has_exclusive(struct i915_active *ref) +{ + return rcu_access_pointer(ref->excl); +} + int i915_active_wait(struct i915_active *ref); int i915_request_await_active(struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 1854e7d168c1e..86e7a232ea3c9 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -8,6 +8,7 @@ #define _I915_ACTIVE_TYPES_H_ #include +#include #include #include #include @@ -51,6 +52,10 @@ struct i915_active { struct mutex mutex; atomic_t count; + /* Preallocated "exclusive" node */ + struct dma_fence __rcu *excl; + struct dma_fence_cb excl_cb; + unsigned long flags; #define I915_ACTIVE_GRAB_BIT 0 diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1d2246c59cdea..b711324ca2948 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1895,10 +1895,8 @@ static int i915_drm_resume(struct drm_device *dev) if (ret) DRM_ERROR("failed to re-enable GGTT\n"); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_restore_gtt_mappings(dev_priv); i915_gem_restore_fences(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_csr_ucode_resume(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fa8e028ac0b56..fc5c618f6c197 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -62,20 +62,31 @@ #include "intel_pm.h" static int -insert_mappable_node(struct i915_ggtt *ggtt, - struct drm_mm_node *node, u32 size) +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { + int err; + + err = mutex_lock_interruptible(&ggtt->vm.mutex); + if (err) + return err; + memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, - size, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); + err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, + size, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + + mutex_unlock(&ggtt->vm.mutex); + + return err; } static void -remove_mappable_node(struct drm_mm_node *node) +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(node); + mutex_unlock(&ggtt->vm.mutex); } int @@ -87,7 +98,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_vma *vma; u64 pinned; - mutex_lock(&ggtt->vm.mutex); + if (mutex_lock_interruptible(&ggtt->vm.mutex)) + return -EINTR; pinned = ggtt->vm.reserved; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) @@ -109,20 +121,24 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, LIST_HEAD(still_in_list); int ret = 0; - lockdep_assert_held(&obj->base.dev->struct_mutex); - spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, obj_link))) { + struct i915_address_space *vm = vma->vm; + + ret = -EBUSY; + if (!i915_vm_tryopen(vm)) + break; + list_move_tail(&vma->obj_link, &still_in_list); spin_unlock(&obj->vma.lock); - ret = -EBUSY; if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || !i915_vma_is_active(vma)) ret = i915_vma_unbind(vma); + i915_vm_close(vm); spin_lock(&obj->vma.lock); } list_splice(&still_in_list, &obj->vma.list); @@ -338,10 +354,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, u64 remain, offset; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); vma = ERR_PTR(-ENODEV); if (!i915_gem_object_is_tiled(obj)) @@ -355,12 +367,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -414,17 +424,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return ret; } @@ -531,10 +538,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, void __user *user_data; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as @@ -544,10 +547,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * using the cache bypass of indirect GGTT access. */ wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (!wakeref) { - ret = -EFAULT; - goto out_unlock; - } + if (!wakeref) + return -EFAULT; } else { /* No backing pages, no fallback, we must force GGTT access */ wakeref = intel_runtime_pm_get(rpm); @@ -569,8 +570,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -634,18 +633,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); intel_gt_flush_ggtt_writes(ggtt->vm.gt); if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } out_rpm: intel_runtime_pm_put(rpm, wakeref); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -968,8 +964,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); - if (i915_gem_object_never_bind_ggtt(obj)) return ERR_PTR(-ENODEV); @@ -1019,13 +1013,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); } - WARN(i915_vma_is_pinned(vma), - "bo is already pinned in ggtt with incorrect alignment:" - " offset=%08x, req.alignment=%llx," - " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", - i915_ggtt_offset(vma), alignment, - !!(flags & PIN_MAPPABLE), - i915_vma_is_map_and_fenceable(vma)); ret = i915_vma_unbind(vma); if (ret) return ERR_PTR(ret); @@ -1444,8 +1431,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) } if (ret == -EIO) { - mutex_lock(&dev_priv->drm.struct_mutex); - /* * Allow engines or uC initialisation to fail by marking the GPU * as wedged. But we only want to do this when the GPU is angry, @@ -1462,8 +1447,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) i915_gem_restore_gtt_mappings(dev_priv); i915_gem_restore_fences(dev_priv); intel_init_clock_gating(dev_priv); - - mutex_unlock(&dev_priv->drm.struct_mutex); } i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 8c1e04f402bcc..0552bf93eea3c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -47,8 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * bound by their active reference. */ return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } @@ -104,7 +103,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct i915_vma *active; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* @@ -127,15 +126,6 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); - /* - * Retire before we search the active list. Although we have - * reasonable accuracy in our retirement lists, we may have - * a stray pin (preventing eviction) that can only be resolved by - * retiring. - */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(dev_priv); - search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); @@ -235,12 +225,12 @@ i915_gem_evict_something(struct i915_address_space *vm, list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) { vma = container_of(node, struct i915_vma, node); - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -268,7 +258,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, struct i915_vma *vma, *next; int ret = 0; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); @@ -349,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -373,7 +363,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) struct i915_vma *vma, *next; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict_vm(vm); /* Switch back to the default context in order to unpin @@ -388,7 +378,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); - mutex_lock(&vm->mutex); list_for_each_entry(vma, &vm->bound_list, vm_link) { if (i915_vma_is_pinned(vma)) continue; @@ -396,13 +385,12 @@ int i915_gem_evict_vm(struct i915_address_space *vm) __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); } - mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 615a9f4ef30c2..487b7261f7ed1 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -230,14 +230,15 @@ static int fence_update(struct i915_fence_reg *fence, i915_gem_object_get_tiling(vma->obj))) return -EINVAL; - ret = i915_active_wait(&vma->active); + ret = i915_vma_sync(vma); if (ret) return ret; } old = xchg(&fence->vma, NULL); if (old) { - ret = i915_active_wait(&old->active); + /* XXX Ideally we would move the waiting to outside the mutex */ + ret = i915_vma_sync(old); if (ret) { fence->vma = old; return ret; @@ -331,13 +332,15 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) return ERR_PTR(-EDEADLK); } -static int __i915_vma_pin_fence(struct i915_vma *vma) +int __i915_vma_pin_fence(struct i915_vma *vma) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; int err; + lockdep_assert_held(&vma->vm->mutex); + /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8eba63ecdb03c..55cebf256d033 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -150,16 +150,18 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { u32 pte_flags; int err; - if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { + if (flags & I915_VMA_ALLOC) { err = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); if (err) return err; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } /* Applicable to VLV, and gen8+ */ @@ -167,6 +169,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); wmb(); @@ -175,7 +178,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static int ppgtt_set_pages(struct i915_vma *vma) @@ -503,15 +507,26 @@ static void i915_address_space_fini(struct i915_address_space *vm) mutex_destroy(&vm->mutex); } -static void ppgtt_destroy_vma(struct i915_address_space *vm) +void __i915_vm_close(struct i915_address_space *vm) { struct i915_vma *vma, *vn; - mutex_lock(&vm->i915->drm.struct_mutex); - list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); i915_vma_destroy(vma); + + i915_gem_object_put(obj); + } GEM_BUG_ON(!list_empty(&vm->bound_list)); - mutex_unlock(&vm->i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); } static void __i915_vm_release(struct work_struct *work) @@ -519,8 +534,6 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, rcu.work); - ppgtt_destroy_vma(vm); - vm->cleanup(vm); i915_address_space_fini(vm); @@ -535,7 +548,6 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - vm->closed = true; queue_rcu_work(vm->i915->wq, &vm->rcu); } @@ -543,6 +555,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) { kref_init(&vm->ref); INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + atomic_set(&vm->open, 1); /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -1771,12 +1784,8 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct drm_i915_private *i915 = vm->i915; - /* FIXME remove the struct_mutex to bring the locking under control */ - mutex_lock(&i915->drm.struct_mutex); i915_vma_destroy(ppgtt->vma); - mutex_unlock(&i915->drm.struct_mutex); gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); @@ -1865,7 +1874,8 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) i915_active_init(i915, &vma->active, NULL, NULL); - vma->vm = &ggtt->vm; + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(&ggtt->vm); vma->ops = &pd_vma_ops; vma->private = ppgtt; @@ -1885,7 +1895,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base) struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); int err = 0; - GEM_BUG_ON(ppgtt->base.vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); /* * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt @@ -2463,14 +2473,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_LOCAL_BIND) { struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; - if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { + if (flags & I915_VMA_ALLOC) { ret = alias->vm.allocate_va_range(&alias->vm, vma->node.start, vma->size); if (ret) return ret; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, + __i915_vma_flags(vma))); alias->vm.insert_entries(&alias->vm, vma, cache_level, pte_flags); } @@ -2499,7 +2513,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) vm->clear_range(vm, vma->node.start, vma->size); } - if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { struct i915_address_space *vm = &i915_vm_to_ggtt(vma->vm)->alias->vm; @@ -2602,22 +2616,16 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; struct i915_ppgtt *ppgtt; - mutex_lock(&i915->drm.struct_mutex); - ppgtt = fetch_and_zero(&ggtt->alias); if (!ppgtt) - goto out; + return; i915_vm_put(&ppgtt->vm); ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - -out: - mutex_unlock(&i915->drm.struct_mutex); } static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) @@ -2734,32 +2742,28 @@ int i915_init_ggtt(struct drm_i915_private *i915) static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; struct i915_vma *vma, *vn; - ggtt->vm.closed = true; + atomic_set(&ggtt->vm.open, 0); rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ - flush_workqueue(i915->wq); + flush_workqueue(ggtt->vm.i915->wq); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&ggtt->vm.mutex); list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) - WARN_ON(i915_vma_unbind(vma)); + WARN_ON(__i915_vma_unbind(vma)); if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); ggtt_release_guc_top(ggtt); - - if (drm_mm_initialized(&ggtt->vm.mm)) { - intel_vgt_deballoon(ggtt); - i915_address_space_fini(&ggtt->vm); - } + intel_vgt_deballoon(ggtt); ggtt->vm.cleanup(&ggtt->vm); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&ggtt->vm.mutex); + i915_address_space_fini(&ggtt->vm); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->iomap); @@ -3188,9 +3192,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) static int ggtt_init_hw(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; - int ret = 0; - - mutex_lock(&i915->drm.struct_mutex); i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); @@ -3206,18 +3207,14 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) ggtt->gmadr.start, ggtt->mappable_end)) { ggtt->vm.cleanup(&ggtt->vm); - ret = -EIO; - goto out; + return -EIO; } ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); i915_ggtt_init_fences(ggtt); -out: - mutex_unlock(&i915->drm.struct_mutex); - - return ret; + return 0; } /** @@ -3289,6 +3286,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; bool flush = false; + int open; intel_gt_check_and_clear_faults(ggtt->vm.gt); @@ -3296,7 +3294,9 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { @@ -3305,24 +3305,20 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) continue; - mutex_unlock(&ggtt->vm.mutex); - - if (!i915_vma_unbind(vma)) - goto lock; + if (!__i915_vma_unbind(vma)) + continue; + clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, - PIN_UPDATE)); + PIN_GLOBAL, NULL)); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; } - -lock: - mutex_lock(&ggtt->vm.mutex); } - ggtt->vm.closed = false; + atomic_set(&ggtt->vm.open, open); ggtt->invalidate(ggtt); mutex_unlock(&ggtt->vm.mutex); @@ -3714,7 +3710,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 offset; int err; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); + GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(alignment && !is_power_of_2(alignment)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3502b9c85a8ef..0a18fdfe63ff0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -307,7 +307,14 @@ struct i915_address_space { unsigned int bind_async_flags; - bool closed; + /* + * Each active user context has its own address space (in full-ppgtt). + * Since the vm may be shared between multiple contexts, we count how + * many contexts keep us "open". Once open hits zero, we are closed + * and do not allow any new attachments, and proceed to shutdown our + * vma and page directories. + */ + atomic_t open; struct mutex mutex; /* protects vma and our lists */ #define VM_CLASS_GGTT 0 @@ -581,6 +588,35 @@ static inline void i915_vm_put(struct i915_address_space *vm) kref_put(&vm->ref, i915_vm_release); } +static inline struct i915_address_space * +i915_vm_open(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + atomic_inc(&vm->open); + return i915_vm_get(vm); +} + +static inline bool +i915_vm_tryopen(struct i915_address_space *vm) +{ + if (atomic_add_unless(&vm->open, 1, 0)) + return i915_vm_get(vm); + + return false; +} + +void __i915_vm_close(struct i915_address_space *vm); + +static inline void +i915_vm_close(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + if (atomic_dec_and_test(&vm->open)) + __i915_vm_close(vm); + + i915_vm_put(vm); +} + int gen6_ppgtt_pin(struct i915_ppgtt *base); void gen6_ppgtt_unpin(struct i915_ppgtt *base); void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); @@ -613,10 +649,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(11) +#define PIN_UPDATE BIT_ULL(9) +#define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 524f6710b7aa9..80055501eccb5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1204,15 +1204,10 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) { struct i915_gem_engines_iter it; - struct drm_i915_private *i915 = stream->dev_priv; struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; int err; - err = i915_mutex_lock_interruptible(&i915->drm); - if (err) - return ERR_PTR(err); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { if (ce->engine->class != RENDER_CLASS) continue; @@ -1229,10 +1224,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) } i915_gem_context_unlock_engines(ctx); - mutex_unlock(&i915->drm.struct_mutex); - if (err) - return ERR_PTR(err); - return stream->pinned_ctx; } @@ -1331,32 +1322,22 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; struct intel_context *ce; stream->specific_ctx_id = INVALID_CTX_ID; stream->specific_ctx_id_mask = 0; ce = fetch_and_zero(&stream->pinned_ctx); - if (ce) { - mutex_lock(&dev_priv->drm.struct_mutex); + if (ce) intel_context_unpin(ce); - mutex_unlock(&dev_priv->drm.struct_mutex); - } } static void free_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; - - mutex_lock(&i915->drm.struct_mutex); - i915_vma_unpin_and_release(&stream->oa_buffer.vma, I915_VMA_RELEASE_MAP); - mutex_unlock(&i915->drm.struct_mutex); - stream->oa_buffer.vaddr = NULL; } @@ -1511,18 +1492,13 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) if (WARN_ON(stream->oa_buffer.vma)) return -ENODEV; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); - ret = PTR_ERR(bo); - goto unlock; + return PTR_ERR(bo); } i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); @@ -1546,7 +1522,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) i915_ggtt_offset(stream->oa_buffer.vma), stream->oa_buffer.vaddr); - goto unlock; + return 0; err_unpin: __i915_vma_unpin(vma); @@ -1557,8 +1533,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vaddr = NULL; stream->oa_buffer.vma = NULL; -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d097f77890ba8..fe91a0e47b88d 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -32,6 +32,7 @@ #include "i915_drv.h" #include "i915_globals.h" +#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" @@ -110,7 +111,8 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - vma->vm = vm; + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; vma->resv = obj->base.resv; @@ -261,8 +263,6 @@ vma_lookup(struct drm_i915_gem_object *obj, * Once created, the VMA is kept until either the object is freed, or the * address space is closed. * - * Must be called with struct_mutex held. - * * Returns the vma, or an error pointer. */ struct i915_vma * @@ -273,7 +273,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma *vma; GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(vm->closed); + GEM_BUG_ON(!atomic_read(&vm->open)); spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); @@ -287,18 +287,63 @@ i915_vma_instance(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma_work { + struct dma_fence_work base; + struct i915_vma *vma; + enum i915_cache_level cache_level; + unsigned int flags; +}; + +static int __vma_bind(struct dma_fence_work *work) +{ + struct i915_vma_work *vw = container_of(work, typeof(*vw), base); + struct i915_vma *vma = vw->vma; + int err; + + err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags); + if (err) + atomic_or(I915_VMA_ERROR, &vma->flags); + + if (vma->obj) + __i915_gem_object_unpin_pages(vma->obj); + + return err; +} + +static const struct dma_fence_work_ops bind_ops = { + .name = "bind", + .work = __vma_bind, +}; + +struct i915_vma_work *i915_vma_work(void) +{ + struct i915_vma_work *vw; + + vw = kzalloc(sizeof(*vw), GFP_KERNEL); + if (!vw) + return NULL; + + dma_fence_work_init(&vw->base, &bind_ops); + vw->base.dma.error = -EAGAIN; /* disable the worker by default */ + + return vw; +} + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map * @cache_level: mapping cache level * @flags: flags like global or local mapping + * @work: preallocated worker for allocating and binding the PTE * * DMA addresses are taken from the scatter-gather table of this object (or of * this VMA in case of non-default GGTT views) and PTE entries set up. * Note that DMA addresses are also the only part of the SG table we care about. */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work) { u32 bind_flags; u32 vma_flags; @@ -315,11 +360,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (GEM_DEBUG_WARN_ON(!flags)) return -EINVAL; - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; + bind_flags = flags; + bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; vma_flags = atomic_read(&vma->flags); vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; @@ -333,9 +375,32 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, GEM_BUG_ON(!vma->pages); trace_i915_vma_bind(vma, bind_flags); - ret = vma->ops->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; + if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { + work->vma = vma; + work->cache_level = cache_level; + work->flags = bind_flags | I915_VMA_ALLOC; + + /* + * Note we only want to chain up to the migration fence on + * the pages (not the object itself). As we don't track that, + * yet, we have to use the exclusive fence instead. + * + * Also note that we do not want to track the async vma as + * part of the obj->resv->excl_fence as it only affects + * execution and not content or object's backing store lifetime. + */ + GEM_BUG_ON(i915_active_has_exclusive(&vma->active)); + i915_active_set_exclusive(&vma->active, &work->base.dma); + work->base.dma.error = 0; /* enable the queue_work() */ + + if (vma->obj) + __i915_gem_object_pin_pages(vma->obj); + } else { + GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags); + ret = vma->ops->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + } atomic_or(bind_flags, &vma->flags); return 0; @@ -348,9 +413,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); - - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; } @@ -358,7 +421,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); - ptr = vma->iomap; + ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, @@ -368,7 +431,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) goto err; } - vma->iomap = ptr; + if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { + io_mapping_unmap(ptr); + ptr = vma->iomap; + } } __i915_vma_pin(vma); @@ -388,18 +454,12 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void i915_vma_flush_writes(struct i915_vma *vma) { - if (!i915_vma_has_ggtt_write(vma)) - return; - - intel_gt_flush_ggtt_writes(vma->vm->gt); - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } void i915_vma_unpin_iomap(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); i915_vma_flush_writes(vma); @@ -435,6 +495,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (!drm_mm_node_allocated(&vma->node)) return false; + if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) + return true; + if (vma->node.size < size) return true; @@ -535,7 +598,6 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj) static int i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = vma->vm->i915; unsigned long color; u64 start, end; int ret; @@ -561,7 +623,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); + end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); @@ -578,34 +640,20 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } color = 0; - if (vma->obj) { - ret = i915_gem_object_pin_pages(vma->obj); - if (ret) - return ret; - - if (i915_vm_has_cache_coloring(vma->vm)) - color = vma->obj->cache_level; - } - - GEM_BUG_ON(vma->pages); - - ret = vma->ops->set_pages(vma); - if (ret) - goto err_unpin; + if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) + color = vma->obj->cache_level; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || - range_overflows(offset, size, end)) { - ret = -EINVAL; - goto err_clear; - } + range_overflows(offset, size, end)) + return -EINVAL; ret = i915_gem_gtt_reserve(vma->vm, &vma->node, size, offset, color, flags); if (ret) - goto err_clear; + return ret; } else { /* * We only support huge gtt pages through the 48b PPGTT, @@ -644,7 +692,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) size, alignment, color, start, end, flags); if (ret) - goto err_clear; + return ret; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); @@ -652,23 +700,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - mutex_lock(&vma->vm->mutex); list_add_tail(&vma->vm_link, &vma->vm->bound_list); - mutex_unlock(&vma->vm->mutex); if (vma->obj) { + atomic_inc(&vma->obj->mm.pages_pin_count); atomic_inc(&vma->obj->bind_count); assert_bind_count(vma->obj); } return 0; - -err_clear: - vma->ops->clear_pages(vma); -err_unpin: - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); - return ret; } static void @@ -677,12 +717,7 @@ i915_vma_remove(struct i915_vma *vma) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - vma->ops->clear_pages(vma); - - mutex_lock(&vma->vm->mutex); - drm_mm_remove_node(&vma->node); list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); /* * Since the unbound list is global, only move to that list if @@ -701,51 +736,211 @@ i915_vma_remove(struct i915_vma *vma) i915_gem_object_unpin_pages(obj); assert_bind_count(obj); } + + drm_mm_remove_node(&vma->node); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) +static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) { - const unsigned int bound = atomic_read(&vma->flags); - int ret; + unsigned int bound; + bool pinned = true; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + bound = atomic_read(&vma->flags); + do { + if (unlikely(flags & ~bound)) + return false; - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err_unpin; + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) + return false; + + if (!(bound & I915_VMA_PIN_MASK)) + goto unpinned; + + GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0); + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + + return true; + +unpinned: + /* + * If pin_count==0, but we are bound, check under the lock to avoid + * racing with a concurrent i915_vma_unbind(). + */ + mutex_lock(&vma->vm->mutex); + do { + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { + pinned = false; + break; + } + + if (unlikely(flags & ~bound)) { + pinned = false; + break; + } + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + mutex_unlock(&vma->vm->mutex); + + return pinned; +} + +static int vma_get_pages(struct i915_vma *vma) +{ + int err = 0; + + if (atomic_add_unless(&vma->pages_count, 1, 0)) + return 0; + + /* Allocations ahoy! */ + if (mutex_lock_interruptible(&vma->pages_mutex)) + return -EINTR; + + if (!atomic_read(&vma->pages_count)) { + if (vma->obj) { + err = i915_gem_object_pin_pages(vma->obj); + if (err) + goto unlock; + } + + err = vma->ops->set_pages(vma); + if (err) + goto unlock; } + atomic_inc(&vma->pages_count); - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err_unpin; +unlock: + mutex_unlock(&vma->pages_mutex); + + return err; +} + +static void __vma_put_pages(struct i915_vma *vma, unsigned int count) +{ + /* We allocate under vma_get_pages, so beware the shrinker */ + mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(atomic_read(&vma->pages_count) < count); + if (atomic_sub_return(count, &vma->pages_count) == 0) { + vma->ops->clear_pages(vma); + GEM_BUG_ON(vma->pages); + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); } - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + mutex_unlock(&vma->pages_mutex); +} - ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags); - if (ret) - goto err_remove; +static void vma_put_pages(struct i915_vma *vma) +{ + if (atomic_add_unless(&vma->pages_count, -1, 1)) + return; + + __vma_put_pages(vma, 1); +} + +static void vma_unbind_pages(struct i915_vma *vma) +{ + unsigned int count; + + lockdep_assert_held(&vma->vm->mutex); + + /* The upper portion of pages_count is the number of bindings */ + count = atomic_read(&vma->pages_count); + count >>= I915_VMA_PAGES_BIAS; + GEM_BUG_ON(!count); + + __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); +} + +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_vma_work *work = NULL; + unsigned int bound; + int err; + + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + GEM_BUG_ON(flags & PIN_UPDATE); + GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL))); + + /* First try and grab the pin without rebinding the vma */ + if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK)) + return 0; + + err = vma_get_pages(vma); + if (err) + return err; + + if (flags & vma->vm->bind_async_flags) { + work = i915_vma_work(); + if (!work) { + err = -ENOMEM; + goto err_pages; + } + } + + /* No more allocations allowed once we hold vm->mutex */ + err = mutex_lock_interruptible(&vma->vm->mutex); + if (err) + goto err_fence; + + bound = atomic_read(&vma->flags); + if (unlikely(bound & I915_VMA_ERROR)) { + err = -ENOMEM; + goto err_unlock; + } + + if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { + err = -EAGAIN; /* pins are meant to be fairly temporary */ + goto err_unlock; + } + + if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) { + __i915_vma_pin(vma); + goto err_unlock; + } + + err = i915_active_acquire(&vma->active); + if (err) + goto err_unlock; + + if (!(bound & I915_VMA_BIND_MASK)) { + err = i915_vma_insert(vma, size, alignment, flags); + if (err) + goto err_active; + + if (i915_is_ggtt(vma->vm)) + __i915_vma_set_map_and_fenceable(vma); + } - GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)); + GEM_BUG_ON(!vma->pages); + err = i915_vma_bind(vma, + vma->obj ? vma->obj->cache_level : 0, + flags, work); + if (err) + goto err_remove; - if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); + /* There should only be at most 2 active bindings (user, global) */ + GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound); + atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + __i915_vma_pin(vma); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(!i915_vma_is_bound(vma, flags)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; err_remove: - if ((bound & I915_VMA_BIND_MASK) == 0) { + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) i915_vma_remove(vma); - GEM_BUG_ON(vma->pages); - GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK); - } -err_unpin: - __i915_vma_unpin(vma); - return ret; +err_active: + i915_active_release(&vma->active); +err_unlock: + mutex_unlock(&vma->vm->mutex); +err_fence: + if (work) + dma_fence_work_commit(&work->base); +err_pages: + vma_put_pages(vma); + return err; } void i915_vma_close(struct i915_vma *vma) @@ -776,9 +971,6 @@ static void __i915_vma_remove_closed(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - if (!i915_vma_is_closed(vma)) - return; - spin_lock_irq(&i915->gt.closed_lock); list_del_init(&vma->closed_link); spin_unlock_irq(&i915->gt.closed_lock); @@ -786,40 +978,35 @@ static void __i915_vma_remove_closed(struct i915_vma *vma) void i915_vma_reopen(struct i915_vma *vma) { - __i915_vma_remove_closed(vma); + if (i915_vma_is_closed(vma)) + __i915_vma_remove_closed(vma); } -static void __i915_vma_destroy(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->fence); + if (drm_mm_node_allocated(&vma->node)) { + mutex_lock(&vma->vm->mutex); + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + mutex_unlock(&vma->vm->mutex); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + } + GEM_BUG_ON(i915_vma_is_active(vma)); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; spin_lock(&obj->vma.lock); list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma.tree); + rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); } - i915_active_fini(&vma->active); - - i915_vma_free(vma); -} - -void i915_vma_destroy(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - - GEM_BUG_ON(i915_vma_is_pinned(vma)); - __i915_vma_remove_closed(vma); + i915_vm_put(vma->vm); - WARN_ON(i915_vma_unbind(vma)); - GEM_BUG_ON(i915_vma_is_active(vma)); - - __i915_vma_destroy(vma); + i915_active_fini(&vma->active); + i915_vma_free(vma); } void i915_vma_parked(struct drm_i915_private *i915) @@ -828,12 +1015,32 @@ void i915_vma_parked(struct drm_i915_private *i915) spin_lock_irq(&i915->gt.closed_lock); list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { - list_del_init(&vma->closed_link); + struct drm_i915_gem_object *obj = vma->obj; + struct i915_address_space *vm = vma->vm; + + /* XXX All to avoid keeping a reference on i915_vma itself */ + + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + if (!i915_vm_tryopen(vm)) { + i915_gem_object_put(obj); + obj = NULL; + } + spin_unlock_irq(&i915->gt.closed_lock); - i915_vma_destroy(vma); + if (obj) { + i915_vma_destroy(vma); + i915_gem_object_put(obj); + } + i915_vm_close(vm); + + /* Restart after dropping lock */ spin_lock_irq(&i915->gt.closed_lock); + next = list_first_entry(&i915->gt.closed_vma, + typeof(*next), closed_link); } spin_unlock_irq(&i915->gt.closed_lock); } @@ -873,6 +1080,20 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) +{ + int err; + + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + /* Wait for the vma to be bound before we start! */ + err = i915_request_await_active(rq, &vma->active); + if (err) + return err; + + return i915_active_add_request(&vma->active, rq); +} + int i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) @@ -880,19 +1101,9 @@ int i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; int err; - assert_vma_held(vma); assert_object_held(obj); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - /* - * Add a reference if we're newly entering the active list. - * The order in which we add operations to the retirement queue is - * vital here: mark_active adds to the start of the callback list, - * such that subsequent callbacks are called first. Therefore we - * add the active reference first and queue for it to be dropped - * *last*. - */ - err = i915_active_add_request(&vma->active, rq); + err = __i915_vma_move_to_active(vma, rq); if (unlikely(err)) return err; @@ -918,38 +1129,23 @@ int i915_vma_move_to_active(struct i915_vma *vma, return 0; } -int i915_vma_unbind(struct i915_vma *vma) +int __i915_vma_unbind(struct i915_vma *vma) { int ret; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); /* * First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. + * + * XXX Actually waiting under the vm->mutex is a hinderance and + * should be pipelined wherever possible. In cases where that is + * unavoidable, we should lift the wait to before the mutex. */ - might_sleep(); - if (i915_vma_is_active(vma)) { - /* - * When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - ret = i915_active_wait(&vma->active); - __i915_vma_unpin(vma); - if (ret) - return ret; - } - GEM_BUG_ON(i915_vma_is_active(vma)); + ret = i915_vma_sync(vma); + if (ret) + return ret; if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); @@ -970,16 +1166,12 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); /* release the fence reg _after_ flushing */ - mutex_lock(&vma->vm->mutex); ret = i915_vma_revoke_fence(vma); - mutex_unlock(&vma->vm->mutex); if (ret) return ret; /* Force a pagefault for domain tracking on next user access */ - mutex_lock(&vma->vm->mutex); i915_vma_revoke_mmap(vma); - mutex_unlock(&vma->vm->mutex); __i915_vma_iounmap(vma); clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); @@ -987,17 +1179,33 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); - if (likely(!vma->vm->closed)) { + if (likely(atomic_read(&vma->vm->open))) { trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma); } - atomic_and(~I915_VMA_BIND_MASK, &vma->flags); + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); + vma_unbind_pages(vma); i915_vma_remove(vma); return 0; } +int i915_vma_unbind(struct i915_vma *vma) +{ + struct i915_address_space *vm = vma->vm; + int err; + + err = mutex_lock_interruptible(&vm->mutex); + if (err) + return err; + + err = __i915_vma_unbind(vma); + mutex_unlock(&vm->mutex); + + return err; +} + struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) { i915_gem_object_make_unshrinkable(vma->obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e49b199f7de7a..858908e3d1cc5 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -96,25 +96,28 @@ struct i915_vma { * exclusive cachelines of a single page, so a maximum of 64 possible * users. */ -#define I915_VMA_PIN_MASK 0xff -#define I915_VMA_PIN_OVERFLOW_BIT 8 -#define I915_VMA_PIN_OVERFLOW ((int)BIT(I915_VMA_PIN_OVERFLOW_BIT)) +#define I915_VMA_PIN_MASK 0x3ff +#define I915_VMA_OVERFLOW 0x200 /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND_BIT 9 -#define I915_VMA_LOCAL_BIND_BIT 10 +#define I915_VMA_GLOBAL_BIND_BIT 10 +#define I915_VMA_LOCAL_BIND_BIT 11 #define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) #define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \ - I915_VMA_LOCAL_BIND | \ - I915_VMA_PIN_OVERFLOW) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) -#define I915_VMA_GGTT_BIT 11 -#define I915_VMA_CAN_FENCE_BIT 12 -#define I915_VMA_USERFAULT_BIT 13 -#define I915_VMA_GGTT_WRITE_BIT 14 +#define I915_VMA_ALLOC_BIT 12 +#define I915_VMA_ALLOC ((int)BIT(I915_VMA_ALLOC_BIT)) + +#define I915_VMA_ERROR_BIT 13 +#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) + +#define I915_VMA_GGTT_BIT 14 +#define I915_VMA_CAN_FENCE_BIT 15 +#define I915_VMA_USERFAULT_BIT 16 +#define I915_VMA_GGTT_WRITE_BIT 17 #define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) #define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) @@ -123,6 +126,11 @@ struct i915_vma { struct i915_active active; +#define I915_VMA_PAGES_BIAS 24 +#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) + atomic_t pages_count; /* number of active binds to the pages */ + struct mutex pages_mutex; /* protect acquire/release of backing pages */ + /** * Support different GGTT views into the same object. * This means there can be multiple VMA mappings per object and per VM. @@ -169,6 +177,8 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) return !i915_active_is_idle(&vma->active); } +int __must_check __i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq); int __must_check i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags); @@ -307,13 +317,18 @@ i915_vma_compare(struct i915_vma *vma, return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); +struct i915_vma_work *i915_vma_work(void); +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work); + bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); +int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); @@ -332,26 +347,8 @@ static inline void i915_vma_unlock(struct i915_vma *vma) dma_resv_unlock(vma->resv); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) { - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - } - - return __i915_vma_do_pin(vma, size, alignment, flags); -} +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); static inline int i915_vma_pin_count(const struct i915_vma *vma) { @@ -366,17 +363,17 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { atomic_inc(&vma->flags); - GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_PIN_OVERFLOW); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); } static inline void __i915_vma_unpin(struct i915_vma *vma) { + GEM_BUG_ON(!i915_vma_is_pinned(vma)); atomic_dec(&vma->flags); } static inline void i915_vma_unpin(struct i915_vma *vma) { - GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); __i915_vma_unpin(vma); } @@ -402,8 +399,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node, * the caller must call i915_vma_unpin_iomap to relinquish the pinning * after the iomapping is no longer required. * - * Callers must hold the struct_mutex. - * * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); @@ -415,8 +410,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + * This function is only valid to be called on a VMA previously + * iomapped by the caller with i915_vma_pin_iomap(). */ void i915_vma_unpin_iomap(struct i915_vma *vma); @@ -444,6 +439,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) int __must_check i915_vma_pin_fence(struct i915_vma *vma); int __must_check i915_vma_revoke_fence(struct i915_vma *vma); +int __i915_vma_pin_fence(struct i915_vma *vma); + static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0); @@ -461,7 +458,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma) static inline void i915_vma_unpin_fence(struct i915_vma *vma) { - /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ if (vma->fence) __i915_vma_unpin_fence(vma); } @@ -490,4 +486,10 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); +static inline int i915_vma_sync(struct i915_vma *vma) +{ + /* Wait for the asynchronous bindings and pending GPU reads */ + return i915_active_wait(&vma->active); +} + #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 37593831b5394..0346c3e5b6b67 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -119,10 +119,8 @@ static void pm_resume(struct drm_i915_private *i915) intel_gt_sanitize(&i915->gt, false); i915_gem_sanitize(i915); - mutex_lock(&i915->drm.struct_mutex); i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_resume(i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 2905fb21d8663..75a4695b82bb7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -106,14 +106,11 @@ static int populate_ggtt(struct drm_i915_private *i915, static void unpin_ggtt(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - mutex_lock(&ggtt->vm.mutex); list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); - mutex_unlock(&ggtt->vm.mutex); } static void cleanup_objects(struct drm_i915_private *i915, @@ -127,11 +124,7 @@ static void cleanup_objects(struct drm_i915_private *i915, i915_gem_object_put(obj); } - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static int igt_evict_something(void *arg) @@ -148,10 +141,12 @@ static int igt_evict_something(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -161,10 +156,12 @@ static int igt_evict_something(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict something */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -230,7 +227,9 @@ static int igt_evict_for_vma(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", err); @@ -240,7 +239,9 @@ static int igt_evict_for_vma(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict the node */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_for_node returned err=%d\n", err); @@ -319,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg) i915_vma_unpin(vma); /* Remove just the second vma */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err); goto cleanup; @@ -330,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg) */ target.color = I915_CACHE_L3_LLC; + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (!err) { pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err); err = -EINVAL; @@ -360,7 +365,9 @@ static int igt_evict_vm(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -369,7 +376,9 @@ static int igt_evict_vm(void *arg) unpin_ggtt(i915); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -410,11 +419,11 @@ static int igt_evict_contexts(void *arg) if (!HAS_FULL_PPGTT(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); + mutex_lock(&i915->ggtt.vm.mutex); err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, i915->ggtt.vm.total, @@ -427,7 +436,9 @@ static int igt_evict_contexts(void *arg) do { struct reserved *r; + mutex_unlock(&i915->ggtt.vm.mutex); r = kcalloc(1, sizeof(*r), GFP_KERNEL); + mutex_lock(&i915->ggtt.vm.mutex); if (!r) { err = -ENOMEM; goto out_locked; @@ -447,7 +458,7 @@ static int igt_evict_contexts(void *arg) count++; } while (1); drm_mm_remove_node(&hole); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&i915->ggtt.vm.mutex); pr_info("Filled GGTT with %lu 1MiB nodes\n", count); /* Overfill the GGTT with context objects and so try to evict one. */ @@ -510,7 +521,7 @@ static int igt_evict_contexts(void *arg) break; } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&i915->ggtt.vm.mutex); out_locked: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; @@ -524,8 +535,8 @@ static int igt_evict_contexts(void *arg) } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -547,12 +558,9 @@ int i915_gem_evict_mock_selftests(void) if (!i915) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 0945d6e978a2e..02749bbfd0cfc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -38,16 +38,7 @@ static void cleanup_freed_objects(struct drm_i915_private *i915) { - /* - * As we may hold onto the struct_mutex for inordinate lengths of - * time, the NMI khungtaskd detector may fire for the free objects - * worker. - */ - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static void fake_free_pages(struct drm_i915_gem_object *obj, @@ -880,6 +871,15 @@ static int __shrink_hole(struct drm_i915_private *i915, i915_vma_unpin(vma); addr += size; + /* + * Since we are injecting allocation faults at random intervals, + * wait for this allocation to complete before we change the + * faultinjection. + */ + err = i915_vma_sync(vma); + if (err) + break; + if (igt_timeout(end_time, "%s timed out at ofset %llx [%llx - %llx]\n", __func__, addr, hole_start, hole_end)) { @@ -1013,21 +1013,19 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); - goto out_unlock; + goto out_free; } GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); - GEM_BUG_ON(ppgtt->vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->vm.open)); err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time); i915_vm_put(&ppgtt->vm); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); +out_free: mock_file_free(dev_priv, file); return err; } @@ -1090,7 +1088,6 @@ static int exercise_ggtt(struct drm_i915_private *i915, IGT_TIMEOUT(end_time); int err = 0; - mutex_lock(&i915->drm.struct_mutex); restart: list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes); drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) { @@ -1111,7 +1108,6 @@ static int exercise_ggtt(struct drm_i915_private *i915, last = hole_end; goto restart; } - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1153,13 +1149,9 @@ static int igt_ggtt_page(void *arg) unsigned int *order, n; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); err = i915_gem_object_pin_pages(obj); if (err) @@ -1227,8 +1219,6 @@ static int igt_ggtt_page(void *arg) i915_gem_object_unpin_pages(obj); out_free: i915_gem_object_put(obj); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1239,6 +1229,9 @@ static void track_vma_bind(struct i915_vma *vma) atomic_inc(&obj->bind_count); /* track for eviction later */ __i915_gem_object_pin_pages(obj); + GEM_BUG_ON(vma->pages); + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; mutex_lock(&vma->vm->mutex); @@ -1336,11 +1329,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1386,11 +1381,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1431,11 +1428,13 @@ static int igt_gtt_reserve(void *arg) 2 * I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1504,11 +1503,13 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", ii->size, ii->alignment, ii->start, ii->end, @@ -1544,10 +1545,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err == -ENOSPC) { /* maxed out the GGTT space */ i915_gem_object_put(obj); @@ -1602,10 +1605,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1649,10 +1654,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1696,8 +1703,9 @@ int i915_gem_gtt_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); + + mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 57cd4180d06c9..eb175da485475 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -647,8 +647,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (err) goto err; + /* Force the wait wait now to avoid including it in the benchmark */ + err = i915_vma_sync(vma); + if (err) + goto err_pin; + return vma; +err_pin: + i915_vma_unpin(vma); err: i915_gem_object_put(obj); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 97752deecccbf..0e4f66312b393 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -831,8 +831,9 @@ int i915_vma_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); + + mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); @@ -879,8 +880,6 @@ static int igt_vma_remapped_gtt(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (t = types; *t; t++) { @@ -976,7 +975,6 @@ static int igt_vma_remapped_gtt(void *arg) out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_put(obj); return err; From 274cbf20fd108fa26d0497282b102e00371210fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:39:59 +0100 Subject: [PATCH 309/331] drm/i915: Push the i915_active.retire into a worker As we need to use a mutex to serialise i915_active activation (because we want to allow the callback to sleep), we need to push the i915_active.retire into a worker callback in case we get need to retire from an atomic context. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-5-chris@chris-wilson.co.uk --- .../gpu/drm/i915/display/intel_frontbuffer.c | 4 ++- drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_pool.c | 1 + drivers/gpu/drm/i915/gt/intel_timeline.c | 1 + drivers/gpu/drm/i915/i915_active.c | 34 ++++++++++++++++--- drivers/gpu/drm/i915/i915_active_types.h | 13 ++++++- drivers/gpu/drm/i915/i915_vma.c | 2 ++ drivers/gpu/drm/i915/selftests/i915_active.c | 6 ++-- 9 files changed, 55 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index fc40dc1fdbcc4..6428b8dd70d32 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -206,6 +206,7 @@ static int frontbuffer_active(struct i915_active *ref) return 0; } +__i915_active_call static void frontbuffer_retire(struct i915_active *ref) { struct intel_frontbuffer *front = @@ -257,7 +258,8 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj) kref_init(&front->ref); atomic_set(&front->bits, 0); i915_active_init(i915, &front->write, - frontbuffer_active, frontbuffer_retire); + frontbuffer_active, + i915_active_may_sleep(frontbuffer_retire)); spin_lock(&i915->fb_tracking.lock); if (obj->frontbuffer) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 95f8e66e45db0..4cd7d2ecf1d58 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -848,6 +848,7 @@ struct context_barrier_task { void *data; }; +__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 26cb838c272c6..06fabdf205cf3 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -138,6 +138,7 @@ static void __context_unpin_state(struct i915_vma *vma) __i915_vma_unpin(vma); } +__i915_active_call static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); @@ -150,6 +151,7 @@ static void __intel_context_retire(struct i915_active *active) intel_timeline_unpin(ce->timeline); intel_ring_unpin(ce->ring); + intel_context_put(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 97d36cca8ded1..81fab101fdb47 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref) return 0; } +__i915_active_call static void pool_retire(struct i915_active *ref) { struct intel_engine_pool_node *node = diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 9d436e14ea8df..653f60e78392d 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) kfree(cl); } +__i915_active_call static void __cacheline_retire(struct i915_active *active) { struct intel_timeline_cacheline *cl = diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 0791736a08fd2..7ca066688b98f 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -132,6 +132,7 @@ __active_retire(struct i915_active *ref) bool retire = false; lockdep_assert_held(&ref->mutex); + GEM_BUG_ON(i915_active_is_idle(ref)); /* return the unused nodes to our slabcache -- flushing the allocator */ if (atomic_dec_and_test(&ref->count)) { @@ -157,6 +158,19 @@ __active_retire(struct i915_active *ref) ref->retire(ref); } +static void +active_work(struct work_struct *wrk) +{ + struct i915_active *ref = container_of(wrk, typeof(*ref), work); + + GEM_BUG_ON(!atomic_read(&ref->count)); + if (atomic_add_unless(&ref->count, -1, 1)) + return; + + mutex_lock(&ref->mutex); + __active_retire(ref); +} + static void active_retire(struct i915_active *ref) { @@ -164,8 +178,13 @@ active_retire(struct i915_active *ref) if (atomic_add_unless(&ref->count, -1, 1)) return; - /* One active may be flushed from inside the acquire of another */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); + /* If we are inside interrupt context (fence signaling), defer */ + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || + !mutex_trylock(&ref->mutex)) { + queue_work(system_unbound_wq, &ref->work); + return; + } + __active_retire(ref); } @@ -240,12 +259,16 @@ void __i915_active_init(struct drm_i915_private *i915, void (*retire)(struct i915_active *ref), struct lock_class_key *key) { + unsigned long bits; + debug_active_init(ref); ref->i915 = i915; ref->flags = 0; ref->active = active; - ref->retire = retire; + ref->retire = ptr_unpack_bits(retire, &bits, 2); + if (bits & I915_ACTIVE_MAY_SLEEP) + ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; ref->excl = NULL; ref->tree = RB_ROOT; @@ -253,6 +276,7 @@ void __i915_active_init(struct drm_i915_private *i915, init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); + INIT_WORK(&ref->work, active_work); } static bool ____active_del_barrier(struct i915_active *ref, @@ -504,6 +528,7 @@ int i915_active_wait(struct i915_active *ref) if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) return -EINTR; + flush_work(&ref->work); if (!i915_active_is_idle(ref)) return -EBUSY; @@ -544,8 +569,9 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); GEM_BUG_ON(atomic_read(&ref->count)); + GEM_BUG_ON(work_pending(&ref->work)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); mutex_destroy(&ref->mutex); } #endif diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 86e7a232ea3c9..021167f0004d6 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -13,6 +13,9 @@ #include #include #include +#include + +#include "i915_utils.h" struct drm_i915_private; struct i915_active_request; @@ -44,6 +47,11 @@ struct i915_active_request { struct active_node; +#define I915_ACTIVE_MAY_SLEEP BIT(0) + +#define __i915_active_call __aligned(4) +#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) + struct i915_active { struct drm_i915_private *i915; @@ -57,11 +65,14 @@ struct i915_active { struct dma_fence_cb excl_cb; unsigned long flags; -#define I915_ACTIVE_GRAB_BIT 0 +#define I915_ACTIVE_RETIRE_SLEEPS BIT(0) +#define I915_ACTIVE_GRAB_BIT 1 int (*active)(struct i915_active *ref); void (*retire)(struct i915_active *ref); + struct work_struct work; + struct llist_head preallocated_barriers; }; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fe91a0e47b88d..e191247c7c5f6 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,6 +91,7 @@ static int __i915_vma_active(struct i915_active *ref) return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; } +__i915_active_call static void __i915_vma_retire(struct i915_active *ref) { i915_vma_put(active_to_vma(ref)); @@ -1152,6 +1153,7 @@ int __i915_vma_unbind(struct i915_vma *vma) return -EBUSY; } + GEM_BUG_ON(i915_vma_is_active(vma)); if (!drm_mm_node_allocated(&vma->node)) return 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index afecfa081ff4f..a41785822ed9f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -121,7 +121,7 @@ __live_active_setup(struct drm_i915_private *i915) } i915_active_release(&active->base); - if (active->retired && count) { + if (READ_ONCE(active->retired) && count) { pr_err("i915_active retired before submission!\n"); err = -EINVAL; } @@ -161,7 +161,7 @@ static int live_active_wait(void *arg) } i915_active_wait(&active->base); - if (!active->retired) { + if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after waiting!\n"); err = -EINVAL; } @@ -200,7 +200,7 @@ static int live_active_retire(void *arg) if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - if (!active->retired) { + if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); err = -EINVAL; } From b1e3177bd1d8f41e2a9cc847e56a96cdc0eefe62 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:00 +0100 Subject: [PATCH 310/331] drm/i915: Coordinate i915_active with its own mutex Forgo the struct_mutex serialisation for i915_active, and interpose its own mutex handling for active/retire. This is a multi-layered sleight-of-hand. First, we had to ensure that no active/retire callbacks accidentally inverted the mutex ordering rules, nor assumed that they were themselves serialised by struct_mutex. More challenging though, is the rule over updating elements of the active rbtree. Instead of the whole i915_active now being serialised by struct_mutex, allocations/rotations of the tree are serialised by the i915_active.mutex and individual nodes are serialised by the caller using the i915_timeline.mutex (we need to use nested spinlocks to interact with the dma_fence callback lists). The pain point here is that instead of a single mutex around execbuf, we now have to take a mutex for active tracker (one for each vma, context, etc) and a couple of spinlocks for each fence update. The improvement in fine grained locking allowing for multiple concurrent clients (eventually!) should be worth it in typical loads. v2: Add some comments that barely elucidate anything :( Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-6-chris@chris-wilson.co.uk --- .../gpu/drm/i915/display/intel_frontbuffer.c | 2 +- drivers/gpu/drm/i915/display/intel_overlay.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 9 +- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_pool.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 10 +- drivers/gpu/drm/i915/gt/intel_timeline.c | 7 +- .../gpu/drm/i915/gt/intel_timeline_types.h | 9 +- drivers/gpu/drm/i915/gt/selftest_context.c | 16 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 10 +- .../gpu/drm/i915/gt/selftests/mock_timeline.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 3 - drivers/gpu/drm/i915/i915_active.c | 322 +++++++++--------- drivers/gpu/drm/i915/i915_active.h | 319 ++++------------- drivers/gpu/drm/i915/i915_active_types.h | 23 +- drivers/gpu/drm/i915/i915_gem.c | 42 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 38 +-- drivers/gpu/drm/i915/i915_request.h | 1 - drivers/gpu/drm/i915/i915_vma.c | 4 +- drivers/gpu/drm/i915/selftests/i915_active.c | 32 +- 24 files changed, 298 insertions(+), 572 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 6428b8dd70d32..84b164f31895b 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj) front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); - i915_active_init(i915, &front->write, + i915_active_init(&front->write, frontbuffer_active, i915_active_may_sleep(frontbuffer_retire)); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 3f4ac1ee76684..e12e1a753af06 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; - i915_active_init(dev_priv, - &overlay->last_flip, + i915_active_init(&overlay->last_flip, NULL, intel_overlay_last_flip_retire); ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4cd7d2ecf1d58..9d85aab68d34e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -868,20 +868,18 @@ static int context_barrier_task(struct i915_gem_context *ctx, void (*task)(void *data), void *data) { - struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct intel_context *ce; int err = 0; - lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!task); cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, NULL, cb_retire); + i915_active_init(&cb->base, NULL, cb_retire); err = i915_active_acquire(&cb->base); if (err) { kfree(cb); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index e1aab2fd1cd9a..c00b4f077f9ee 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -8,6 +8,7 @@ #define __I915_GEM_OBJECT_TYPES_H__ #include +#include #include "i915_active.h" #include "i915_selftest.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index cca192ecff8b6..0a4115c6c2757 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine) struct llist_node *node, *next; llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct i915_active_request *active = + struct dma_fence_cb *cb = container_of((struct list_head *)node, - typeof(*active), link); + typeof(*cb), node); - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, NULL); + cb->func(NULL, cb); } } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 06fabdf205cf3..35a40c2820a28 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce, mutex_init(&ce->pin_mutex); - i915_active_init(ctx->i915, &ce->active, + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire); } @@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, return err; /* Queue this switch after current activity by this context. */ - err = i915_active_request_set(&tl->last_request, rq); + err = i915_active_fence_set(&tl->last_request, rq); mutex_unlock(&tl->mutex); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 81fab101fdb47..3cdbd5f8b5be2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) return ERR_PTR(-ENOMEM); node->pool = pool; - i915_active_init(engine->i915, &node->active, pool_active, pool_retire); + i915_active_init(&node->active, pool_active, pool_retire); obj = i915_gem_object_create_internal(engine->i915, sz); if (IS_ERR(obj)) { diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index e189897e87975..055496f0825fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -844,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) */ spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; spin_unlock_irqrestore(&timelines->lock, flags); @@ -859,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * (I915_FENCE_TIMEOUT) so this wait should not be unbounded * in the worst case. */ - dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(fence); /* Restart iteration after droping lock */ spin_lock_irqsave(&timelines->lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 653f60e78392d..0f959694303c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); - i915_active_init(hwsp->gt->i915, &cl->active, - __cacheline_active, __cacheline_retire); + i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); return cl; } @@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq); + err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); if (err) goto err_cacheline; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index c668c4c50e757..98d9ee1663795 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -58,12 +58,13 @@ struct intel_timeline { */ struct list_head requests; - /* Contains an RCU guarded pointer to the last request. No reference is + /* + * Contains an RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. + * the request using i915_active_fence_get(), or manage the RCU + * protection themselves (cf the i915_active_fence API). */ - struct i915_active_request last_request; + struct i915_active_fence last_request; /** * We track the most recent seqno that we wait on in every context so diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 4ce1e25433d22..e6bcbe7ab5e11 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -47,24 +47,20 @@ static int context_sync(struct intel_context *ce) mutex_lock(&tl->mutex); do { - struct i915_request *rq; + struct dma_fence *fence; long timeout; - rcu_read_lock(); - rq = rcu_dereference(tl->last_request.request); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) break; - timeout = i915_request_wait(rq, 0, HZ / 10); + timeout = dma_fence_wait_timeout(fence, false, HZ / 10); if (timeout < 0) err = timeout; else - i915_request_retire_upto(rq); + i915_request_retire_upto(to_request(fence)); - i915_request_put(rq); + dma_fence_put(fence); } while (!err); mutex_unlock(&tl->mutex); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e7bc2dbbb2a51..dd25636abc5bc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1172,9 +1172,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) if (!rq) return NULL; - INIT_LIST_HEAD(&rq->active_list); rq->engine = engine; + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->fence.cb_list); + rq->fence.lock = &rq->lock; + rq->fence.ops = &i915_fence_ops; + i915_sched_node_init(&rq->sched); /* mark this request as permanently incomplete */ @@ -1267,8 +1271,8 @@ static int live_suppress_wait_preempt(void *arg) } /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&i915_request_timeline(rq[i])->last_request, - dummy); + __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request, + &dummy->fence); i915_request_add(rq[i]); } diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 598170efcaf60..2a77c051f36a2 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1a28e3666951d..7052c90d937f6 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->req) return 0; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 7ca066688b98f..023652ded4be3 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -12,8 +12,6 @@ #include "i915_active.h" #include "i915_globals.h" -#define BKL(ref) (&(ref)->i915->drm.struct_mutex) - /* * Active refs memory management * @@ -27,35 +25,35 @@ static struct i915_global_active { } global; struct active_node { - struct i915_active_request base; + struct i915_active_fence base; struct i915_active *ref; struct rb_node node; u64 timeline; }; static inline struct active_node * -node_from_active(struct i915_active_request *active) +node_from_active(struct i915_active_fence *active) { return container_of(active, struct active_node, base); } #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) -static inline bool is_barrier(const struct i915_active_request *active) +static inline bool is_barrier(const struct i915_active_fence *active) { - return IS_ERR(rcu_access_pointer(active->request)); + return IS_ERR(rcu_access_pointer(active->fence)); } static inline struct llist_node *barrier_to_ll(struct active_node *node) { GEM_BUG_ON(!is_barrier(&node->base)); - return (struct llist_node *)&node->base.link; + return (struct llist_node *)&node->base.cb.node; } static inline struct intel_engine_cs * __barrier_to_engine(struct active_node *node) { - return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev); + return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); } static inline struct intel_engine_cs * @@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node) static inline struct active_node *barrier_from_ll(struct llist_node *x) { return container_of((struct list_head *)x, - struct active_node, base.link); + struct active_node, base.cb.node); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) @@ -147,15 +145,18 @@ __active_retire(struct i915_active *ref) if (!retire) return; - GEM_BUG_ON(rcu_access_pointer(ref->excl)); + GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); rbtree_postorder_for_each_entry_safe(it, n, &root, node) { - GEM_BUG_ON(i915_active_request_isset(&it->base)); + GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); } /* After the final retire, the entire struct may be freed */ if (ref->retire) ref->retire(ref); + + /* ... except if you wait on it, you must manage your own references! */ + wake_up_var(ref); } static void @@ -189,12 +190,20 @@ active_retire(struct i915_active *ref) } static void -node_retire(struct i915_active_request *base, struct i915_request *rq) +node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - active_retire(node_from_active(base)->ref); + i915_active_fence_cb(fence, cb); + active_retire(container_of(cb, struct active_node, base.cb)->ref); } -static struct i915_active_request * +static void +excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + i915_active_fence_cb(fence, cb); + active_retire(container_of(cb, struct i915_active, excl.cb)); +} + +static struct i915_active_fence * active_instance(struct i915_active *ref, struct intel_timeline *tl) { struct active_node *node, *prealloc; @@ -238,7 +247,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) } node = prealloc; - i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire); + __i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire); node->ref = ref; node->timeline = idx; @@ -253,8 +262,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) return &node->base; } -void __i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, +void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), struct lock_class_key *key) @@ -263,19 +271,18 @@ void __i915_active_init(struct drm_i915_private *i915, debug_active_init(ref); - ref->i915 = i915; ref->flags = 0; ref->active = active; ref->retire = ptr_unpack_bits(retire, &bits, 2); if (bits & I915_ACTIVE_MAY_SLEEP) ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; - ref->excl = NULL; ref->tree = RB_ROOT; ref->cache = NULL; init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); + __i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire); INIT_WORK(&ref->work, active_work); } @@ -329,9 +336,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node) int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, - struct i915_request *rq) + struct dma_fence *fence) { - struct i915_active_request *active; + struct i915_active_fence *active; int err; lockdep_assert_held(&tl->mutex); @@ -354,66 +361,44 @@ int i915_active_ref(struct i915_active *ref, * request that we want to emit on the kernel_context. */ __active_del_barrier(ref, node_from_active(active)); - RCU_INIT_POINTER(active->request, NULL); - INIT_LIST_HEAD(&active->link); - } else { - if (!i915_active_request_isset(active)) - atomic_inc(&ref->count); + RCU_INIT_POINTER(active->fence, NULL); + atomic_dec(&ref->count); } - GEM_BUG_ON(!atomic_read(&ref->count)); - __i915_active_request_set(active, rq); + if (!__i915_active_fence_set(active, fence)) + atomic_inc(&ref->count); out: i915_active_release(ref); return err; } -static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb) -{ - struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb); - - RCU_INIT_POINTER(ref->excl, NULL); - dma_fence_put(f); - - active_retire(ref); -} - void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) { /* We expect the caller to manage the exclusive timeline ordering */ GEM_BUG_ON(i915_active_is_idle(ref)); - dma_fence_get(f); - - rcu_read_lock(); - if (rcu_access_pointer(ref->excl)) { - struct dma_fence *old; - - old = dma_fence_get_rcu_safe(&ref->excl); - if (old) { - if (dma_fence_remove_callback(old, &ref->excl_cb)) - atomic_dec(&ref->count); - dma_fence_put(old); - } - } - rcu_read_unlock(); - - atomic_inc(&ref->count); - rcu_assign_pointer(ref->excl, f); + /* + * As we don't know which mutex the caller is using, we told a small + * lie to the debug code that it is using the i915_active.mutex; + * and now we must stick to that lie. + */ + mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_); + if (!__i915_active_fence_set(&ref->excl, f)) + atomic_inc(&ref->count); + mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_); +} - if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) { - RCU_INIT_POINTER(ref->excl, NULL); - atomic_dec(&ref->count); - dma_fence_put(f); - } +bool i915_active_acquire_if_busy(struct i915_active *ref) +{ + debug_active_assert(ref); + return atomic_add_unless(&ref->count, 1, 0); } int i915_active_acquire(struct i915_active *ref) { int err; - debug_active_assert(ref); - if (atomic_add_unless(&ref->count, 1, 0)) + if (i915_active_acquire_if_busy(ref)) return 0; err = mutex_lock_interruptible(&ref->mutex); @@ -438,121 +423,57 @@ void i915_active_release(struct i915_active *ref) active_retire(ref); } -static void __active_ungrab(struct i915_active *ref) -{ - clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags); -} - -bool i915_active_trygrab(struct i915_active *ref) +static void enable_signaling(struct i915_active_fence *active) { - debug_active_assert(ref); - - if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)) - return false; - - if (!atomic_add_unless(&ref->count, 1, 0)) { - __active_ungrab(ref); - return false; - } + struct dma_fence *fence; - return true; -} - -void i915_active_ungrab(struct i915_active *ref) -{ - GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); - - active_retire(ref); - __active_ungrab(ref); -} - -static int excl_wait(struct i915_active *ref) -{ - struct dma_fence *old; - int err = 0; - - if (!rcu_access_pointer(ref->excl)) - return 0; - - rcu_read_lock(); - old = dma_fence_get_rcu_safe(&ref->excl); - rcu_read_unlock(); - if (old) { - err = dma_fence_wait(old, true); - dma_fence_put(old); - } + fence = i915_active_fence_get(active); + if (!fence) + return; - return err; + dma_fence_enable_sw_signaling(fence); + dma_fence_put(fence); } int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; - int err; + int err = 0; might_sleep(); - might_lock(&ref->mutex); - - if (i915_active_is_idle(ref)) - return 0; - - err = mutex_lock_interruptible(&ref->mutex); - if (err) - return err; - if (!atomic_add_unless(&ref->count, 1, 0)) { - mutex_unlock(&ref->mutex); + if (!i915_active_acquire_if_busy(ref)) return 0; - } - - err = excl_wait(ref); - if (err) - goto out; + /* Flush lazy signals */ + enable_signaling(&ref->excl); rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - if (is_barrier(&it->base)) { /* unconnected idle-barrier */ - err = -EBUSY; - break; - } + if (is_barrier(&it->base)) /* unconnected idle barrier */ + continue; - err = i915_active_request_retire(&it->base, BKL(ref)); - if (err) - break; + enable_signaling(&it->base); } + /* Any fence added after the wait begins will not be auto-signaled */ -out: - __active_retire(ref); + i915_active_release(ref); if (err) return err; - if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) + if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) return -EINTR; - flush_work(&ref->work); - if (!i915_active_is_idle(ref)) - return -EBUSY; - return 0; } -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active) -{ - struct i915_request *barrier = - i915_active_request_raw(active, &rq->i915->drm.struct_mutex); - - return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; -} - int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) { int err = 0; - if (rcu_access_pointer(ref->excl)) { + if (rcu_access_pointer(ref->excl.fence)) { struct dma_fence *fence; rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&ref->excl); + fence = dma_fence_get_rcu_safe(&ref->excl.fence); rcu_read_unlock(); if (fence) { err = i915_request_await_dma_fence(rq, fence); @@ -578,7 +499,7 @@ void i915_active_fini(struct i915_active *ref) static inline bool is_idle_barrier(struct active_node *node, u64 idx) { - return node->timeline == idx && !i915_active_request_isset(&node->base); + return node->timeline == idx && !i915_active_fence_isset(&node->base); } static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) @@ -698,13 +619,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node->base.lock = &engine->kernel_context->timeline->mutex; #endif - RCU_INIT_POINTER(node->base.request, NULL); - node->base.retire = node_retire; + RCU_INIT_POINTER(node->base.fence, NULL); + node->base.cb.func = node_retire; node->timeline = idx; node->ref = ref; } - if (!i915_active_request_isset(&node->base)) { + if (!i915_active_fence_isset(&node->base)) { /* * Mark this as being *our* unconnected proto-node. * @@ -714,8 +635,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, * and then we can use the rb_node and list pointers * for our tracking of the pending barrier. */ - RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); - node->base.link.prev = (void *)engine; + RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); + node->base.cb.node.prev = (void *)engine; atomic_inc(&ref->count); } @@ -782,44 +703,113 @@ void i915_request_add_active_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct llist_node *node, *next; + unsigned long flags; GEM_BUG_ON(intel_engine_is_virtual(engine)); GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); + node = llist_del_all(&engine->barrier_tasks); + if (!node) + return; /* * Attach the list of proto-fences to the in-flight request such * that the parent i915_active will be released when this request * is retired. */ - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); + spin_lock_irqsave(&rq->lock, flags); + llist_for_each_safe(node, next, node) { + RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence); smp_wmb(); /* serialise with reuse_idle_barrier */ - list_add_tail((struct list_head *)node, &rq->active_list); + list_add_tail((struct list_head *)node, &rq->fence.cb_list); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#define active_is_held(active) lockdep_is_held((active)->lock) +#else +#define active_is_held(active) true +#endif + +/* + * __i915_active_fence_set: Update the last active fence along its timeline + * @active: the active tracker + * @fence: the new fence (under construction) + * + * Records the new @fence as the last active fence along its timeline in + * this active tracker, moving the tracking callbacks from the previous + * fence onto this one. Returns the previous fence (if not already completed), + * which the caller must ensure is executed before the new fence. To ensure + * that the order of fences within the timeline of the i915_active_fence is + * maintained, it must be locked by the caller. + */ +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence) +{ + struct dma_fence *prev; + unsigned long flags; + + /* NB: must be serialised by an outer timeline mutex (active->lock) */ + spin_lock_irqsave(fence->lock, flags); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + + prev = rcu_dereference_protected(active->fence, active_is_held(active)); + if (prev) { + GEM_BUG_ON(prev == fence); + spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + __list_del_entry(&active->cb.node); + spin_unlock(prev->lock); /* serialise with prev->cb_list */ + + /* + * active->fence is reset by the callback from inside + * interrupt context. We need to serialise our list + * manipulation with the fence->lock to prevent the prev + * being lost inside an interrupt (it can't be replaced as + * no other caller is allowed to enter __i915_active_fence_set + * as we hold the timeline lock). After serialising with + * the callback, we need to double check which ran first, + * our list_del() [decoupling prev from the callback] or + * the callback... + */ + prev = rcu_access_pointer(active->fence); } + + rcu_assign_pointer(active->fence, fence); + list_add_tail(&active->cb.node, &fence->cb_list); + + spin_unlock_irqrestore(fence->lock, flags); + + return prev; } -int i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq) +int i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq) { - int err; + struct dma_fence *fence; + int err = 0; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) lockdep_assert_held(active->lock); #endif - /* Must maintain ordering wrt previous active requests */ - err = i915_request_await_active_request(rq, active); - if (err) - return err; + /* Must maintain timeline ordering wrt previous active requests */ + rcu_read_lock(); + fence = __i915_active_fence_set(active, &rq->fence); + if (fence) /* but the previous fence may not belong to that timeline! */ + fence = dma_fence_get_rcu(fence); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } - __i915_active_request_set(active, rq); - return 0; + return err; } -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request) +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) { - /* Space left intentionally blank */ + i915_active_fence_cb(fence, cb); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 90034f61b7c25..4f52fe6146d20 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -12,6 +12,10 @@ #include "i915_active_types.h" #include "i915_request.h" +struct i915_request; +struct intel_engine_cs; +struct intel_timeline; + /* * We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. @@ -28,308 +32,108 @@ * write access so that we can perform concurrent read operations between * the CPU and GPU engines, as well as waiting for all rendering to * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_active_request to track the most recent (in + * object then embeds a #i915_active_fence to track the most recent (in * retirement order) request relevant for the desired mode of access. - * The #i915_active_request is updated with i915_active_request_set() to + * The #i915_active_fence is updated with i915_active_fence_set() to * track the most recent fence request, typically this is done as part of * i915_vma_move_to_active(). * - * When the #i915_active_request completes (is retired), it will + * When the #i915_active_fence completes (is retired), it will * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_active_request.request == NULL). The owner + * itself as idle (i915_active_fence.request == NULL). The owner * can then perform any action, such as delayed freeing of an active * resource including itself. */ -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request); +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb); /** - * i915_active_request_init - prepares the activity tracker for use + * __i915_active_fence_init - prepares the activity tracker for use * @active - the active tracker - * @rq - initial request to track, can be NULL + * @fence - initial fence to track, can be NULL * @func - a callback when then the tracker is retired (becomes idle), * can be NULL * - * i915_active_request_init() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired + * i915_active_fence_init() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active fence + * associated with it. When the last fence becomes idle, when it is retired * after completion, the optional callback @func is invoked. */ static inline void -i915_active_request_init(struct i915_active_request *active, +__i915_active_fence_init(struct i915_active_fence *active, struct mutex *lock, - struct i915_request *rq, - i915_active_retire_fn retire) + void *fence, + dma_fence_func_t fn) { - RCU_INIT_POINTER(active->request, rq); - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_active_retire_noop; + RCU_INIT_POINTER(active->fence, fence); + active->cb.func = fn ?: i915_active_noop; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) active->lock = lock; #endif } -#define INIT_ACTIVE_REQUEST(name, lock) \ - i915_active_request_init((name), (lock), NULL, NULL) - -/** - * i915_active_request_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * __i915_active_request_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -__i915_active_request_set(struct i915_active_request *active, - struct i915_request *request) -{ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - lockdep_assert_held(active->lock); -#endif - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} +#define INIT_ACTIVE_FENCE(A, LOCK) \ + __i915_active_fence_init((A), (LOCK), NULL, NULL) -int __must_check -i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq); +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence); /** - * i915_active_request_raw - return the active request + * i915_active_fence_set - updates the tracker to watch the current fence * @active - the active tracker + * @rq - the request to watch * - * i915_active_request_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. + * i915_active_fence_set() watches the given @rq for completion. While + * that @rq is busy, the @active reports busy. When that @rq is signaled + * (or else retired) the @active tracker is updated to report idle. */ -static inline struct i915_request * -i915_active_request_raw(const struct i915_active_request *active, - struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_active_request_peek - report the active request being monitored - * @active - the active tracker - * - * i915_active_request_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_peek(const struct i915_active_request *active, - struct mutex *mutex) -{ - struct i915_request *request; - - request = i915_active_request_raw(active, mutex); - if (!request || i915_request_completed(request)) - return NULL; - - return request; -} - -/** - * i915_active_request_get - return a reference to the active request - * @active - the active tracker - * - * i915_active_request_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_get(const struct i915_active_request *active, - struct mutex *mutex) -{ - return i915_request_get(i915_active_request_peek(active, mutex)); -} - -/** - * __i915_active_request_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_active_request_get() returns a reference to the active request, - * or NULL if the active tracker is idle. The caller must hold the RCU read - * lock, but the returned pointer is safe to use outside of RCU. - */ -static inline struct i915_request * -__i915_active_request_get_rcu(const struct i915_active_request *active) -{ - /* - * Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * rq = active.request - * retire(rq) -> free(rq); - * (rq is now first on the slab freelist) - * active.request = NULL - * - * rq = new submission on a new object - * ref(rq) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_request_alloc(). - */ - do { - struct i915_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_request_completed(request)) - return NULL; - - /* - * An especially silly compiler could decide to recompute the - * result of i915_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_request_get_rcu, but not executing - * that while still executing i915_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_request_get_rcu(request); - - /* - * What stops the following rcu_access_pointer() from occurring - * before the above i915_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - +int __must_check +i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq); /** - * i915_active_request_get_unlocked - return a reference to the active request + * i915_active_fence_get - return a reference to the active fence * @active - the active tracker * - * i915_active_request_get_unlocked() returns a reference to the active request, + * i915_active_fence_get() returns a reference to the active fence, * or NULL if the active tracker is idle. The reference is obtained under RCU, * so no locking is required by the caller. * - * The reference should be freed with i915_request_put(). + * The reference should be freed with dma_fence_put(). */ -static inline struct i915_request * -i915_active_request_get_unlocked(const struct i915_active_request *active) +static inline struct dma_fence * +i915_active_fence_get(struct i915_active_fence *active) { - struct i915_request *request; + struct dma_fence *fence; rcu_read_lock(); - request = __i915_active_request_get_rcu(active); + fence = dma_fence_get_rcu_safe(&active->fence); rcu_read_unlock(); - return request; + return fence; } /** - * i915_active_request_isset - report whether the active tracker is assigned + * i915_active_fence_isset - report whether the active tracker is assigned * @active - the active tracker * - * i915_active_request_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle + * i915_active_fence_isset() returns true if the active tracker is currently + * assigned to a fence. Due to the lazy retiring, that fence may be idle * and this may report stale information. */ static inline bool -i915_active_request_isset(const struct i915_active_request *active) +i915_active_fence_isset(const struct i915_active_fence *active) { - return rcu_access_pointer(active->request); + return rcu_access_pointer(active->fence); } -/** - * i915_active_request_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_active_request_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_active_request_retire(struct i915_active_request *active, - struct mutex *mutex) +static inline void +i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct i915_request *request; - long ret; - - request = i915_active_request_raw(active, mutex); - if (!request) - return 0; - - ret = i915_request_wait(request, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; + struct i915_active_fence *active = + container_of(cb, typeof(*active), cb); - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; + RCU_INIT_POINTER(active->fence, NULL); } /* @@ -358,47 +162,40 @@ i915_active_request_retire(struct i915_active_request *active, * synchronisation. */ -void __i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, +void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), struct lock_class_key *key); -#define i915_active_init(i915, ref, active, retire) do { \ +#define i915_active_init(ref, active, retire) do { \ static struct lock_class_key __key; \ \ - __i915_active_init(i915, ref, active, retire, &__key); \ + __i915_active_init(ref, active, retire, &__key); \ } while (0) int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, - struct i915_request *rq); + struct dma_fence *fence); static inline int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) { - return i915_active_ref(ref, i915_request_timeline(rq), rq); + return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); } void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); static inline bool i915_active_has_exclusive(struct i915_active *ref) { - return rcu_access_pointer(ref->excl); + return rcu_access_pointer(ref->excl.fence); } int i915_active_wait(struct i915_active *ref); -int i915_request_await_active(struct i915_request *rq, - struct i915_active *ref); -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active); +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref); int i915_active_acquire(struct i915_active *ref); +bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); -void __i915_active_release_nested(struct i915_active *ref, int subclass); - -bool i915_active_trygrab(struct i915_active *ref); -void i915_active_ungrab(struct i915_active *ref); static inline bool i915_active_is_idle(const struct i915_active *ref) diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 021167f0004d6..d89a74c142c66 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -17,17 +17,9 @@ #include "i915_utils.h" -struct drm_i915_private; -struct i915_active_request; -struct i915_request; - -typedef void (*i915_active_retire_fn)(struct i915_active_request *, - struct i915_request *); - -struct i915_active_request { - struct i915_request __rcu *request; - struct list_head link; - i915_active_retire_fn retire; +struct i915_active_fence { + struct dma_fence __rcu *fence; + struct dma_fence_cb cb; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) /* * Incorporeal! @@ -53,20 +45,17 @@ struct active_node; #define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) struct i915_active { - struct drm_i915_private *i915; + atomic_t count; + struct mutex mutex; struct active_node *cache; struct rb_root tree; - struct mutex mutex; - atomic_t count; /* Preallocated "exclusive" node */ - struct dma_fence __rcu *excl; - struct dma_fence_cb excl_cb; + struct i915_active_fence excl; unsigned long flags; #define I915_ACTIVE_RETIRE_SLEEPS BIT(0) -#define I915_ACTIVE_GRAB_BIT 1 int (*active)(struct i915_active *ref); void (*retire)(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fc5c618f6c197..b0aa0a7c680f8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -892,28 +892,38 @@ wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout) spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; spin_unlock_irqrestore(&timelines->lock, flags); - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (wait & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq); + if (!dma_fence_is_i915(fence)) { + timeout = dma_fence_wait_timeout(fence, + flags & I915_WAIT_INTERRUPTIBLE, + timeout); + } else { + struct i915_request *rq = to_request(fence); + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used as + * a synchronous step prior to idling, e.g. in suspend + * for flushing all current operations to memory before + * sleeping. These we want to complete as quickly as + * possible to avoid prolonged stalls, so allow the gpu + * to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq); + + timeout = i915_request_wait(rq, flags, timeout); + } - timeout = i915_request_wait(rq, wait, timeout); - i915_request_put(rq); + dma_fence_put(fence); if (timeout < 0) return timeout; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 55cebf256d033..7462d87f7a48c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1861,7 +1861,6 @@ static const struct i915_vma_ops pd_vma_ops = { static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) { - struct drm_i915_private *i915 = ppgtt->base.vm.i915; struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; struct i915_vma *vma; @@ -1872,7 +1871,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - i915_active_init(i915, &vma->active, NULL, NULL); + i915_active_init(&vma->active, NULL, NULL); mutex_init(&vma->pages_mutex); vma->vm = i915_vm_get(&ggtt->vm); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6384a06aa5bfb..a28ee754b7b48 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1299,7 +1299,7 @@ capture_vma(struct capture_vma *next, if (!c) return next; - if (!i915_active_trygrab(&vma->active)) { + if (!i915_active_acquire_if_busy(&vma->active)) { kfree(c); return next; } @@ -1439,7 +1439,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress) *this->slot = i915_error_object_create(i915, vma, compress); - i915_active_ungrab(&vma->active); + i915_active_release(&vma->active); i915_vma_put(vma); capture = this->next; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a8916412759b3..4ffe62a421864 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -218,8 +218,6 @@ static void remove_from_engine(struct i915_request *rq) static bool i915_request_retire(struct i915_request *rq) { - struct i915_active_request *active, *next; - if (!i915_request_completed(rq)) return false; @@ -244,35 +242,6 @@ static bool i915_request_retire(struct i915_request *rq) &i915_request_timeline(rq)->requests)); rq->ring->head = rq->postfix; - /* - * Walk through the active list, calling retire on each. This allows - * objects to track their GPU activity and mark themselves as idle - * when their *last* active request is completed (updating state - * tracking lists for eviction, active references for GEM, etc). - * - * As the ->retire() may free the node, we decouple it first and - * pass along the auxiliary information (to avoid dereferencing - * the node after the callback). - */ - list_for_each_entry_safe(active, next, &rq->active_list, link) { - /* - * In microbenchmarks or focusing upon time inside the kernel, - * we may spend an inordinate amount of time simply handling - * the retirement of requests and processing their callbacks. - * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of - * i915_active_request. So we try to keep this loop as - * streamlined as possible and also prefetch the next - * i915_active_request to try and hide the likely cache miss. - */ - prefetchw(next); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, rq); - } - local_irq_disable(); /* @@ -704,7 +673,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->flags = 0; rq->execution_mask = ALL_ENGINES; - INIT_LIST_HEAD(&rq->active_list); INIT_LIST_HEAD(&rq->execute_cb); /* @@ -743,7 +711,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) ce->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&rq->active_list)); GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); @@ -1174,8 +1141,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = rcu_dereference_protected(timeline->last_request.request, - lockdep_is_held(&timeline->mutex)); + prev = to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1200,7 +1167,6 @@ __i915_request_add_to_timeline(struct i915_request *rq) * us, the timeline will hold its seqno which is later than ours. */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); - __i915_active_request_set(&timeline->last_request, rq); return prev; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ec5bb4c2e5aeb..91a885c36c6b6 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -211,7 +211,6 @@ struct i915_request { * on the active_list (of their final request). */ struct i915_capture_list *capture_list; - struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e191247c7c5f6..9fdcd4e2c799f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -120,8 +120,7 @@ vma_create(struct drm_i915_gem_object *obj, vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - i915_active_init(vm->i915, &vma->active, - __i915_vma_active, __i915_vma_retire); + i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire); /* Declare ourselves safe for use inside shrinkers */ if (IS_ENABLED(CONFIG_LOCKDEP)) { @@ -1148,6 +1147,7 @@ int __i915_vma_unbind(struct i915_vma *vma) if (ret) return ret; + GEM_BUG_ON(i915_vma_is_active(vma)); if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); return -EBUSY; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index a41785822ed9f..2cc71bcf884fe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915) return NULL; kref_init(&active->ref); - i915_active_init(i915, &active->base, __live_active, __live_retire); + i915_active_init(&active->base, __live_active, __live_retire); return active; } @@ -146,19 +146,13 @@ static int live_active_wait(void *arg) { struct drm_i915_private *i915 = arg; struct live_active *active; - intel_wakeref_t wakeref; int err = 0; /* Check that we get a callback when requests retire upon waiting */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - active = __live_active_setup(i915); - if (IS_ERR(active)) { - err = PTR_ERR(active); - goto err; - } + if (IS_ERR(active)) + return PTR_ERR(active); i915_active_wait(&active->base); if (!READ_ONCE(active->retired)) { @@ -168,11 +162,9 @@ static int live_active_wait(void *arg) __live_put(active); + mutex_lock(&i915->drm.struct_mutex); if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - -err: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -182,23 +174,19 @@ static int live_active_retire(void *arg) { struct drm_i915_private *i915 = arg; struct live_active *active; - intel_wakeref_t wakeref; int err = 0; /* Check that we get a callback when requests are indirectly retired */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - active = __live_active_setup(i915); - if (IS_ERR(active)) { - err = PTR_ERR(active); - goto err; - } + if (IS_ERR(active)) + return PTR_ERR(active); /* waits for & retires all requests */ + mutex_lock(&i915->drm.struct_mutex); if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); @@ -207,10 +195,6 @@ static int live_active_retire(void *arg) __live_put(active); -err: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } From b72348406927740d4cfb55f2c1e19c6769ffc666 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:01 +0100 Subject: [PATCH 311/331] drm/i915: Move idle barrier cleanup into engine-pm Now that we now longer need to guarantee that the active callback is under the struct_mutex, we can lift it out of the i915_gem_park() and into the engine parking itself. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 19 ------------------- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_active.c | 1 + 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 0a4115c6c2757..5180b2ee1cb72 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,29 +11,10 @@ #include "i915_drv.h" #include "i915_globals.h" -static void call_idle_barriers(struct intel_engine_cs *engine) -{ - struct llist_node *node, *next; - - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct dma_fence_cb *cb = - container_of((struct list_head *)node, - typeof(*cb), node); - - cb->func(NULL, cb); - } -} - static void i915_gem_park(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) - call_idle_barriers(engine); /* cleanup after wedging */ - i915_vma_parked(i915); i915_globals_park(); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 66fc49b76ea8d..8e5e513eddc9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -124,6 +124,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) return result; } +static void call_idle_barriers(struct intel_engine_cs *engine) +{ + struct llist_node *node, *next; + + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { + struct dma_fence_cb *cb = + container_of((struct list_head *)node, + typeof(*cb), node); + + cb->func(NULL, cb); + } +} + static int __engine_park(struct intel_wakeref *wf) { struct intel_engine_cs *engine = @@ -143,6 +156,8 @@ static int __engine_park(struct intel_wakeref *wf) GEM_TRACE("%s\n", engine->name); + call_idle_barriers(engine); /* cleanup after wedging */ + intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 023652ded4be3..aa37c07004b96 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -693,6 +693,7 @@ void i915_active_acquire_barrier(struct i915_active *ref) rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } From 7e8057626640cfedbae000c5032be32269713687 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:02 +0100 Subject: [PATCH 312/331] drm/i915: Drop struct_mutex from around i915_retire_requests() We don't need to hold struct_mutex now for retiring requests, so drop it from i915_retire_requests() and i915_gem_wait_for_idle(), finally removing I915_WAIT_LOCKED for good. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-8-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 20 +-- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 45 ++---- .../i915/gem/selftests/i915_gem_coherency.c | 40 +++-- .../drm/i915/gem/selftests/i915_gem_context.c | 18 +-- .../drm/i915/gem/selftests/i915_gem_mman.c | 6 +- .../i915/gem/selftests/i915_gem_object_blt.c | 4 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 28 +--- drivers/gpu/drm/i915/gt/selftest_context.c | 4 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 89 ++--------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 23 ++- drivers/gpu/drm/i915/gt/selftest_timeline.c | 91 +++++------ .../gpu/drm/i915/gt/selftest_workarounds.c | 6 +- drivers/gpu/drm/i915/i915_debugfs.c | 42 ++--- drivers/gpu/drm/i915/i915_gem.c | 19 +-- drivers/gpu/drm/i915/i915_request.h | 7 +- drivers/gpu/drm/i915/selftests/i915_active.c | 8 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 6 +- drivers/gpu/drm/i915/selftests/i915_request.c | 151 +++++------------- .../gpu/drm/i915/selftests/i915_selftest.c | 8 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 4 - .../gpu/drm/i915/selftests/igt_flush_test.c | 30 ++-- .../gpu/drm/i915/selftests/igt_flush_test.h | 2 +- .../gpu/drm/i915/selftests/igt_live_test.c | 9 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 4 - 26 files changed, 213 insertions(+), 460 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index c1fca5728e6ec..81366aa4812b8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->engine->i915; struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; @@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); if (IS_ERR(batch)) { @@ -229,8 +226,6 @@ static void clear_pages_worker(struct work_struct *work) intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9d85aab68d34e..0ab416887fc2a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1159,8 +1159,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) } static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { int ret; @@ -1183,23 +1182,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce, return ret; } -static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->engine->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - static int user_to_context_sseu(struct drm_i915_private *i915, const struct drm_i915_gem_context_param_sseu *user, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 5180b2ee1cb72..2ddc3aeaac9d4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -48,11 +48,7 @@ static void retire_work_handler(struct work_struct *work) struct drm_i915_private *i915 = container_of(work, typeof(*i915), gem.retire_work.work); - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } + i915_retire_requests(i915); queue_delayed_work(i915->wq, &i915->gem.retire_work, @@ -86,26 +82,23 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - do { - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - result = false; + if (i915_gem_wait_for_idle(gt->i915, + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); } - } while (i915_retire_requests(gt->i915) && result); + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + result = false; + } if (intel_gt_pm_wait_for_idle(gt)) result = false; @@ -145,8 +138,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) user_forcewake(&i915->gt, true); - mutex_lock(&i915->drm.struct_mutex); - /* * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last @@ -158,8 +149,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) */ switch_to_kernel_context_sync(&i915->gt); - mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0ff7a89aadca1..549810f70aeb6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,6 +7,7 @@ #include #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -78,7 +79,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -90,15 +91,21 @@ static int gtt_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int gtt_get(struct drm_i915_gem_object *obj, @@ -107,7 +114,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -119,15 +126,21 @@ static int gtt_get(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int wc_set(struct drm_i915_gem_object *obj, @@ -280,7 +293,6 @@ static int igt_gem_coherency(void *arg) struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; int err = 0; @@ -299,8 +311,6 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; @@ -326,7 +336,7 @@ static int igt_gem_coherency(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto unlock; + goto free; } i915_random_reorder(offsets, ncachelines, &prng); @@ -377,15 +387,13 @@ static int igt_gem_coherency(void *arg) } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +free: kfree(offsets); return err; put_object: i915_gem_object_put(obj); - goto unlock; + goto free; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8eba0d3a31def..f5402aad9b5a0 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -164,7 +164,6 @@ struct parallel_switch { static int __live_parallel_switch1(void *data) { struct parallel_switch *arg = data; - struct drm_i915_private *i915 = arg->ce[0]->engine->i915; IGT_TIMEOUT(end_time); unsigned long count; @@ -176,16 +175,12 @@ static int __live_parallel_switch1(void *data) for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { i915_request_put(rq); - mutex_lock(&i915->drm.struct_mutex); rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) { - mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); } err = 0; @@ -205,7 +200,6 @@ static int __live_parallel_switch1(void *data) static int __live_parallel_switchN(void *data) { struct parallel_switch *arg = data; - struct drm_i915_private *i915 = arg->ce[0]->engine->i915; IGT_TIMEOUT(end_time); unsigned long count; int n; @@ -215,15 +209,11 @@ static int __live_parallel_switchN(void *data) for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *rq; - mutex_lock(&i915->drm.struct_mutex); rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) { - mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); } count++; @@ -1173,7 +1163,7 @@ __sseu_test(const char *name, if (ret) return ret; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; @@ -1277,7 +1267,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_fail; out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) ret = -EIO; intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 36aca1c172e77..856b8e467ee81 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -581,12 +581,8 @@ static void disable_retire_worker(struct drm_i915_private *i915) static void restore_retire_worker(struct drm_i915_private *i915) { + igt_flush_test(i915); intel_gt_pm_put(&i915->gt); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_driver_register__shrinker(i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index c21d747e7d05b..9ec55b3a3815e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -65,9 +65,7 @@ static int igt_fill_blt(void *arg) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -166,9 +164,7 @@ static int igt_copy_blt(void *arg) if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) dst->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_copy_blt(src, dst, ce); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 29fa1dabbc2e0..d4cefdd384313 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -196,26 +196,14 @@ int intel_gt_resume(struct intel_gt *gt) static void wait_for_idle(struct intel_gt *gt) { - mutex_lock(>->i915->drm.struct_mutex); /* XXX */ - do { - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_LOCKED, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - } - } while (i915_retire_requests(gt->i915)); - mutex_unlock(>->i915->drm.struct_mutex); + if (i915_gem_wait_for_idle(gt->i915, 0, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + } intel_gt_pm_wait_for_idle(gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index e6bcbe7ab5e11..86cffbb0a9cb6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -318,7 +318,7 @@ static int live_active_context(void *arg) if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -431,7 +431,7 @@ static int live_remote_context(void *arg) if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index d3bee9f880088..ffbb3d23b8873 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -58,7 +58,9 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; + mutex_lock(>->i915->drm.struct_mutex); h->ctx = kernel_context(gt->i915); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); @@ -285,7 +287,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->gt->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -309,10 +311,9 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; for_each_engine(engine, gt->i915, id) { struct intel_wedge_me w; @@ -355,8 +356,6 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -395,8 +394,6 @@ static int igt_reset_nop(void *arg) reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(>->i915->drm.struct_mutex); - for_each_engine(engine, gt->i915, id) { int i; @@ -417,7 +414,6 @@ static int igt_reset_nop(void *arg) intel_gt_reset(gt, ALL_ENGINES, NULL); igt_global_reset_unlock(gt); - mutex_unlock(>->i915->drm.struct_mutex); if (intel_gt_is_wedged(gt)) { err = -EIO; break; @@ -429,16 +425,13 @@ static int igt_reset_nop(void *arg) break; } - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -494,7 +487,6 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(>->i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; @@ -507,7 +499,6 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } err = intel_engine_reset(engine, NULL); - mutex_unlock(>->i915->drm.struct_mutex); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -533,15 +524,12 @@ static int igt_reset_nop_engine(void *arg) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -563,9 +551,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) return 0; if (active) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; } @@ -593,17 +579,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (active) { struct i915_request *rq; - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -647,7 +630,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } @@ -655,11 +638,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (intel_gt_is_wedged(gt)) err = -EIO; - if (active) { - mutex_lock(>->i915->drm.struct_mutex); + if (active) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -741,10 +721,8 @@ static int active_engine(void *data) struct i915_request *old = rq[idx]; struct i915_request *new; - mutex_lock(&engine->i915->drm.struct_mutex); new = igt_request_alloc(ctx[idx], engine); if (IS_ERR(new)) { - mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } @@ -755,7 +733,6 @@ static int active_engine(void *data) rq[idx] = i915_request_get(new); i915_request_add(new); - mutex_unlock(&engine->i915->drm.struct_mutex); err = active_request_put(old); if (err) @@ -795,9 +772,7 @@ static int __igt_reset_engines(struct intel_gt *gt, return 0; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; @@ -855,17 +830,14 @@ static int __igt_reset_engines(struct intel_gt *gt, struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -977,9 +949,7 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) break; - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915); if (err) break; } @@ -987,11 +957,8 @@ static int __igt_reset_engines(struct intel_gt *gt, if (intel_gt_is_wedged(gt)) err = -EIO; - if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); + if (flags & TEST_ACTIVE) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -1061,7 +1028,6 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; @@ -1109,7 +1075,6 @@ static int igt_reset_wait(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1189,10 +1154,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { @@ -1255,8 +1219,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (err) goto out_rq; - mutex_unlock(>->i915->drm.struct_mutex); - if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1305,16 +1267,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, put_task_struct(tsk); } - mutex_lock(>->i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: i915_gem_object_put(obj); fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); - if (intel_gt_is_wedged(gt)) return -EIO; @@ -1396,7 +1354,6 @@ static int igt_reset_queue(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; @@ -1511,7 +1468,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -1519,7 +1476,6 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1546,11 +1502,9 @@ static int igt_handle_error(void *arg) if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(>->i915->drm.struct_mutex); - err = hang_init(&h, gt); if (err) - goto err_unlock; + return err; rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -1574,8 +1528,6 @@ static int igt_handle_error(void *arg) goto err_request; } - mutex_unlock(>->i915->drm.struct_mutex); - /* Temporarily disable error capture */ error = xchg(&global->first_error, (void *)-1); @@ -1583,8 +1535,6 @@ static int igt_handle_error(void *arg) xchg(&global->first_error, error); - mutex_lock(>->i915->drm.struct_mutex); - if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); err = -EINVAL; @@ -1595,8 +1545,6 @@ static int igt_handle_error(void *arg) i915_request_put(rq); err_fini: hang_fini(&h); -err_unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -1689,7 +1637,6 @@ static int igt_reset_engines_atomic(void *arg) return 0; igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); /* Flush any requests before we get started and check basics */ if (!igt_force_reset(gt)) @@ -1709,9 +1656,7 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ igt_force_reset(gt); - unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); return err; @@ -1751,10 +1696,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = intel_gt_live_subtests(tests, gt); - mutex_lock(>->i915->drm.struct_mutex); - igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(>->i915->runtime_pm, wakeref); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index dd25636abc5bc..04c1cf5736426 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -61,7 +61,7 @@ static int live_sanitycheck(void *arg) } igt_spinner_end(&spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915)) { err = -EIO; goto err_ctx; } @@ -384,8 +384,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, if (err) goto out; - if (i915_request_wait(head, - I915_WAIT_LOCKED, + if (i915_request_wait(head, 0, 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); @@ -457,7 +456,7 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915)) { err = -EIO; goto err_pin; } @@ -1010,7 +1009,7 @@ static int live_nopreempt(void *arg) goto err_wedged; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; } @@ -1075,7 +1074,7 @@ static int live_suppress_self_preempt(void *arg) if (!intel_engine_has_preemption(engine)) continue; - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; intel_engine_pm_get(engine); @@ -1136,7 +1135,7 @@ static int live_suppress_self_preempt(void *arg) } intel_engine_pm_put(engine); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; } @@ -1297,7 +1296,7 @@ static int live_suppress_wait_preempt(void *arg) for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; if (engine->execlists.preempt_hang.count) { @@ -1576,7 +1575,7 @@ static int live_preempt_hang(void *arg) igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915)) { err = -EIO; goto err_ctx_lo; } @@ -1973,7 +1972,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, prime, div64_u64(ktime_to_ns(times[1]), prime)); out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (nc = 0; nc < nctx; nc++) { @@ -2118,7 +2117,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, goto out; out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (n = 0; n < nsibling; n++) @@ -2296,7 +2295,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, out: for (n = 0; !IS_ERR(rq[n]); n++) i915_request_put(rq[n]); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; kernel_context_close(ctx); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 3214814031650..16abfabf08c7d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,7 +6,7 @@ #include -#include "gem/i915_gem_pm.h" +#include "intel_engine_pm.h" #include "intel_gt.h" #include "../selftests/i915_random.h" @@ -136,7 +136,6 @@ static int mock_hwsp_freelist(void *arg) goto err_put; } - mutex_lock(&state.i915->drm.struct_mutex); for (p = phases; p->name; p++) { pr_debug("%s(%s)\n", __func__, p->name); for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { @@ -149,7 +148,6 @@ static int mock_hwsp_freelist(void *arg) out: for (na = 0; na < state.max; na++) __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); kfree(state.history); err_put: drm_dev_put(&state.i915->drm); @@ -449,8 +447,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) struct i915_request *rq; int err; - lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ - err = intel_timeline_pin(tl); if (err) { rq = ERR_PTR(err); @@ -461,10 +457,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) if (IS_ERR(rq)) goto out_unpin; + i915_request_get(rq); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); - if (err) + if (err) { + i915_request_put(rq); rq = ERR_PTR(err); + } out_unpin: intel_timeline_unpin(tl); @@ -500,7 +500,6 @@ static int live_hwsp_engine(void *arg) struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -515,14 +514,13 @@ static int live_hwsp_engine(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for_each_engine(engine, i915, id) { if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + for (n = 0; n < NUM_TIMELINES; n++) { struct intel_timeline *tl; struct i915_request *rq; @@ -530,22 +528,26 @@ static int live_hwsp_engine(void *arg) tl = checked_intel_timeline_create(i915); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } timelines[count++] = tl; + i915_request_put(rq); } + + intel_engine_pm_put(engine); + if (err) + break; } -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -559,11 +561,7 @@ static int live_hwsp_engine(void *arg) intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -575,7 +573,6 @@ static int live_hwsp_alternate(void *arg) struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -591,9 +588,6 @@ static int live_hwsp_alternate(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for (n = 0; n < NUM_TIMELINES; n++) { for_each_engine(engine, i915, id) { @@ -605,11 +599,14 @@ static int live_hwsp_alternate(void *arg) tl = checked_intel_timeline_create(i915); if (IS_ERR(tl)) { + intel_engine_pm_put(engine); err = PTR_ERR(tl); goto out; } + intel_engine_pm_get(engine); rq = tl_write(tl, engine, count); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); @@ -617,11 +614,12 @@ static int live_hwsp_alternate(void *arg) } timelines[count++] = tl; + i915_request_put(rq); } } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -635,11 +633,7 @@ static int live_hwsp_alternate(void *arg) intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -650,7 +644,6 @@ static int live_hwsp_wrap(void *arg) struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = 0; /* @@ -658,14 +651,10 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - tl = intel_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } + if (IS_ERR(tl)) + return PTR_ERR(tl); + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; @@ -681,7 +670,9 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; @@ -747,16 +738,12 @@ static int live_hwsp_wrap(void *arg) } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; intel_timeline_unpin(tl); out_free: intel_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } @@ -765,7 +752,6 @@ static int live_hwsp_recycle(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count; int err = 0; @@ -775,9 +761,6 @@ static int live_hwsp_recycle(void *arg) * want to confuse ourselves or the GPU. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); @@ -785,6 +768,8 @@ static int live_hwsp_recycle(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + do { struct intel_timeline *tl; struct i915_request *rq; @@ -792,21 +777,22 @@ static int live_hwsp_recycle(void *arg) tl = checked_intel_timeline_create(i915); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Wait for timeline writes timed out!\n"); + i915_request_put(rq); intel_timeline_put(tl); err = -EIO; - goto out; + break; } if (*tl->hwsp_seqno != count) { @@ -815,17 +801,18 @@ static int live_hwsp_recycle(void *arg) err = -EINVAL; } + i915_request_put(rq); intel_timeline_put(tl); count++; if (err) - goto out; + break; } while (!__igt_timeout(end_time, NULL)); - } -out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + intel_engine_pm_put(engine); + if (err) + break; + } return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index d40ce0709bff4..4ee2e2babd0da 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -676,7 +676,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, break; } - if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(ctx->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -1090,7 +1090,7 @@ static int live_isolated_whitelist(void *arg) kernel_context_close(client[i].ctx); } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; return err; @@ -1248,7 +1248,7 @@ live_engine_reset_workarounds(void *arg) igt_global_reset_unlock(&i915->gt); kernel_context_close(ctx); - igt_flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915); return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fec9fb7cc384b..385289895107b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3621,6 +3621,7 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + int ret; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); @@ -3630,40 +3631,21 @@ i915_drop_caches_set(void *data, u64 val) I915_IDLE_ENGINES_TIMEOUT)) intel_gt_set_wedged(&i915->gt); - /* No need to check and wait for gpu resets, only libdrm auto-restarts - * on ioctls on -EAGAIN. */ - if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) { - int ret; + if (val & DROP_RETIRE) + i915_retire_requests(i915); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (val & (DROP_IDLE | DROP_ACTIVE)) { + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; + } - /* - * To finish the flush of the idle_worker, we must complete - * the switch-to-kernel-context, which requires a double - * pass through wait_for_idle: first queues the switch, - * second waits for the switch. - */ - if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE)) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (ret == 0 && val & DROP_IDLE) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (val & DROP_RETIRE) - i915_retire_requests(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - if (ret == 0 && val & DROP_IDLE) - ret = intel_gt_pm_wait_for_idle(&i915->gt); + if (val & DROP_IDLE) { + ret = intel_gt_pm_wait_for_idle(&i915->gt); + if (ret) + return ret; } if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b0aa0a7c680f8..83c2c7bf1e349 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -945,19 +945,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!intel_gt_pm_is_awake(gt)) return 0; - GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", - flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); - - timeout = wait_for_timelines(gt, flags, timeout); - if (timeout < 0) - return timeout; + do { + timeout = wait_for_timelines(gt, flags, timeout); + if (timeout < 0) + return timeout; - if (flags & I915_WAIT_LOCKED) { - lockdep_assert_held(&i915->drm.struct_mutex); + cond_resched(); + if (signal_pending(current)) + return -EINTR; - i915_retire_requests(i915); - } + } while (i915_retire_requests(i915)); return 0; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 91a885c36c6b6..621fb33cda30d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -308,10 +308,9 @@ long i915_request_wait(struct i915_request *rq, long timeout) __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) -#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ -#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(4) +#define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ +#define I915_WAIT_FOR_IDLE_BOOST BIT(3) static inline bool i915_request_signaled(const struct i915_request *rq) { diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 2cc71bcf884fe..268192b5613bd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -162,10 +162,8 @@ static int live_active_wait(void *arg) __live_put(active); - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -183,10 +181,8 @@ static int live_active_retire(void *arg) return PTR_ERR(active); /* waits for & retires all requests */ - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 75a4695b82bb7..52d2df843148e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -523,7 +523,7 @@ static int igt_evict_contexts(void *arg) mutex_lock(&i915->ggtt.vm.mutex); out_locked: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; while (reserved) { struct reserved *next = reserved->next; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 02749bbfd0cfc..e40e6cfa51f12 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1705,12 +1705,8 @@ int i915_gem_gtt_mock_selftests(void) err = i915_subtests(tests, ggtt); - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: @@ -2006,7 +2002,7 @@ static int igt_cs_tlb(void *arg) } } end: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; i915_gem_context_unlock_engines(ctx); i915_gem_object_unpin_map(out); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index eb175da485475..d7d68c6a6bd50 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -41,21 +41,16 @@ static int igt_add_request(void *arg) { struct drm_i915_private *i915 = arg; struct i915_request *request; - int err = -ENOMEM; /* Basic preliminary test to create a request and let it loose! */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); if (!request) - goto out_unlock; + return -ENOMEM; i915_request_add(request); - err = 0; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int igt_wait_request(void *arg) @@ -67,12 +62,10 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } + if (!request) + return -ENOMEM; + i915_request_get(request); if (i915_request_wait(request, 0, 0) != -ETIME) { @@ -125,9 +118,7 @@ static int igt_wait_request(void *arg) err = 0; out_request: i915_request_put(request); -out_unlock: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -140,52 +131,45 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_locked; - } + if (!request) + return -ENOMEM; if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); - goto out_locked; + goto out; } i915_request_add(request); - mutex_unlock(&i915->drm.struct_mutex); if (dma_fence_is_signaled(&request->fence)) { pr_err("fence signaled immediately!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { pr_err("fence wait success after submit (expected timeout)!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out (expected success)!\n"); - goto out_device; + goto out; } if (!dma_fence_is_signaled(&request->fence)) { pr_err("fence unsignaled after waiting!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out when complete (expected success)!\n"); - goto out_device; + goto out; } err = 0; -out_device: - mutex_lock(&i915->drm.struct_mutex); -out_locked: +out: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -199,6 +183,8 @@ static int igt_request_rewind(void *arg) mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); + mutex_unlock(&i915->drm.struct_mutex); + ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); @@ -211,7 +197,10 @@ static int igt_request_rewind(void *arg) i915_request_get(request); i915_request_add(request); + mutex_lock(&i915->drm.struct_mutex); ctx[1] = mock_context(i915, "B"); + mutex_unlock(&i915->drm.struct_mutex); + ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); @@ -233,7 +222,6 @@ static int igt_request_rewind(void *arg) request->engine->submit_request(request); rcu_read_unlock(); - mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { pr_err("timed out waiting for high priority request\n"); @@ -248,14 +236,12 @@ static int igt_request_rewind(void *arg) err = 0; err: i915_request_put(vip); - mutex_lock(&i915->drm.struct_mutex); err_context_1: mock_context_close(ctx[1]); i915_request_put(request); err_context_0: mock_context_close(ctx[0]); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -282,7 +268,6 @@ __live_request_alloc(struct intel_context *ce) static int __igt_breadcrumbs_smoketest(void *arg) { struct smoketest *t = arg; - struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; const unsigned int total = 4 * t->ncontexts + 1; unsigned int num_waits = 0, num_fences = 0; @@ -337,14 +322,11 @@ static int __igt_breadcrumbs_smoketest(void *arg) struct i915_request *rq; struct intel_context *ce; - mutex_lock(BKL); - ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); rq = t->request_alloc(ce); intel_context_put(ce); if (IS_ERR(rq)) { - mutex_unlock(BKL); err = PTR_ERR(rq); count = n; break; @@ -357,8 +339,6 @@ static int __igt_breadcrumbs_smoketest(void *arg) requests[n] = i915_request_get(rq); i915_request_add(rq); - mutex_unlock(BKL); - if (err >= 0) err = i915_sw_fence_await_dma_fence(wait, &rq->fence, @@ -457,15 +437,15 @@ static int mock_breadcrumbs_smoketest(void *arg) goto out_threads; } - mutex_lock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < t.ncontexts; n++) { + mutex_lock(&t.engine->i915->drm.struct_mutex); t.contexts[n] = mock_context(t.engine->i915, "mock"); + mutex_unlock(&t.engine->i915->drm.struct_mutex); if (!t.contexts[n]) { ret = -ENOMEM; goto out_contexts; } } - mutex_unlock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < ncpus; n++) { threads[n] = kthread_run(__igt_breadcrumbs_smoketest, @@ -495,18 +475,15 @@ static int mock_breadcrumbs_smoketest(void *arg) atomic_long_read(&t.num_fences), ncpus); - mutex_lock(&t.engine->i915->drm.struct_mutex); out_contexts: for (n = 0; n < t.ncontexts; n++) { if (!t.contexts[n]) break; mock_context_close(t.contexts[n]); } - mutex_unlock(&t.engine->i915->drm.struct_mutex); kfree(t.contexts); out_threads: kfree(threads); - return ret; } @@ -539,7 +516,6 @@ static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; unsigned int id; int err = -ENODEV; @@ -549,28 +525,25 @@ static int live_nop_request(void *arg) * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - for_each_engine(engine, i915, id) { - struct i915_request *request = NULL; unsigned long n, prime; IGT_TIMEOUT(end_time); ktime_t times[2] = {}; err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + return err; for_each_prime_number_from(prime, 1, 8192) { + struct i915_request *request = NULL; + times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = i915_request_create(engine->kernel_context); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_unlock; - } + if (IS_ERR(request)) + return PTR_ERR(request); /* This space is left intentionally blank. * @@ -585,9 +558,11 @@ static int live_nop_request(void *arg) * for latency. */ + i915_request_get(request); i915_request_add(request); } i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 1) @@ -599,7 +574,7 @@ static int live_nop_request(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + return err; pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -607,9 +582,6 @@ static int live_nop_request(void *arg) prime, div64_u64(ktime_to_ns(times[1]), prime)); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -679,6 +651,7 @@ empty_request(struct intel_engine_cs *engine, if (err) goto out_request; + i915_request_get(request); out_request: i915_request_add(request); return err ? ERR_PTR(err) : request; @@ -688,7 +661,6 @@ static int live_empty_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; struct i915_vma *batch; unsigned int id; @@ -699,14 +671,9 @@ static int live_empty_request(void *arg) * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - batch = empty_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unlock; - } + if (IS_ERR(batch)) + return PTR_ERR(batch); for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); @@ -730,6 +697,7 @@ static int live_empty_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = empty_request(engine, batch); if (IS_ERR(request)) { err = PTR_ERR(request); @@ -745,6 +713,7 @@ static int live_empty_request(void *arg) if (__igt_timeout(end_time, NULL)) break; } + i915_request_put(request); err = igt_live_test_end(&t); if (err) @@ -759,9 +728,6 @@ static int live_empty_request(void *arg) out_batch: i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -841,7 +807,6 @@ static int live_all_engines(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_request *request[I915_NUM_ENGINES]; - intel_wakeref_t wakeref; struct igt_live_test t; struct i915_vma *batch; unsigned int id; @@ -852,18 +817,15 @@ static int live_all_engines(void *arg) * block doing so, and that they don't complete too soon. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + return err; batch = recursive_batch(i915); if (IS_ERR(batch)) { err = PTR_ERR(batch); pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - goto out_unlock; + return err; } for_each_engine(engine, i915, id) { @@ -933,9 +895,6 @@ static int live_all_engines(void *arg) i915_request_put(request[id]); i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -945,7 +904,6 @@ static int live_sequential_engines(void *arg) struct i915_request *request[I915_NUM_ENGINES] = {}; struct i915_request *prev = NULL; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; unsigned int id; int err; @@ -956,12 +914,9 @@ static int live_sequential_engines(void *arg) * they are running on independent engines. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + return err; for_each_engine(engine, i915, id) { struct i915_vma *batch; @@ -971,7 +926,7 @@ static int live_sequential_engines(void *arg) err = PTR_ERR(batch); pr_err("%s: Unable to create batch for %s, err=%d\n", __func__, engine->name, err); - goto out_unlock; + return err; } request[id] = i915_request_create(engine->kernel_context); @@ -1063,9 +1018,6 @@ static int live_sequential_engines(void *arg) i915_vma_put(request[id]->batch); i915_request_put(request[id]); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1080,16 +1032,12 @@ static int __live_parallel_engine1(void *arg) struct i915_request *rq; int err; - mutex_lock(&engine->i915->drm.struct_mutex); rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) { - mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&engine->i915->drm.struct_mutex); err = 0; if (i915_request_wait(rq, 0, HZ / 5) < 0) @@ -1115,16 +1063,11 @@ static int __live_parallel_engineN(void *arg) do { struct i915_request *rq; - mutex_lock(&engine->i915->drm.struct_mutex); rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) { - mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_add(rq); - mutex_unlock(&engine->i915->drm.struct_mutex); - count++; } while (!__igt_timeout(end_time, NULL)); @@ -1154,9 +1097,7 @@ static int live_parallel_engines(void *arg) struct task_struct *tsk[I915_NUM_ENGINES] = {}; struct igt_live_test t; - mutex_lock(&i915->drm.struct_mutex); err = igt_live_test_begin(&t, i915, __func__, ""); - mutex_unlock(&i915->drm.struct_mutex); if (err) break; @@ -1184,10 +1125,8 @@ static int live_parallel_engines(void *arg) put_task_struct(tsk[id]); } - mutex_lock(&i915->drm.struct_mutex); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); } return err; @@ -1280,9 +1219,10 @@ static int live_breadcrumbs_smoketest(void *arg) goto out_threads; } - mutex_lock(&i915->drm.struct_mutex); for (n = 0; n < t[0].ncontexts; n++) { + mutex_lock(&i915->drm.struct_mutex); t[0].contexts[n] = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); if (!t[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; @@ -1299,7 +1239,6 @@ static int live_breadcrumbs_smoketest(void *arg) t[id].max_batch = max_batches(t[0].contexts[0], engine); if (t[id].max_batch < 0) { ret = t[id].max_batch; - mutex_unlock(&i915->drm.struct_mutex); goto out_flush; } /* One ring interleaved between requests from all cpus */ @@ -1314,7 +1253,6 @@ static int live_breadcrumbs_smoketest(void *arg) &t[id], "igt/%d.%d", id, n); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); - mutex_unlock(&i915->drm.struct_mutex); goto out_flush; } @@ -1322,7 +1260,6 @@ static int live_breadcrumbs_smoketest(void *arg) threads[id * ncpus + n] = tsk; } } - mutex_unlock(&i915->drm.struct_mutex); msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); @@ -1350,10 +1287,8 @@ static int live_breadcrumbs_smoketest(void *arg) pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); - mutex_lock(&i915->drm.struct_mutex); ret = igt_live_test_end(&live) ?: ret; out_contexts: - mutex_unlock(&i915->drm.struct_mutex); kfree(t[0].contexts); out_threads: kfree(threads); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 438ea0eaa4160..825a8286cbe84 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -263,10 +263,8 @@ int __i915_live_teardown(int err, void *data) { struct drm_i915_private *i915 = data; - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); @@ -284,10 +282,8 @@ int __intel_gt_live_teardown(int err, void *data) { struct intel_gt *gt = data; - mutex_lock(>->i915->drm.struct_mutex); - if (igt_flush_test(gt->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; - mutex_unlock(>->i915->drm.struct_mutex); i915_gem_drain_freed_objects(gt->i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 0e4f66312b393..1c9db08f7c283 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -833,12 +833,8 @@ int i915_vma_mock_selftests(void) err = i915_subtests(tests, ggtt); - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index d3b5eb402d33f..2a5fbe46ea9f5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -12,31 +12,25 @@ #include "igt_flush_test.h" -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +int igt_flush_test(struct drm_i915_private *i915) { int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0; - int repeat = !!(flags & I915_WAIT_LOCKED); cond_resched(); - do { - if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { - pr_err("%pS timed out, cancelling all further testing.\n", - __builtin_return_address(0)); + i915_retire_requests(i915); + if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); - GEM_TRACE("%pS timed out.\n", - __builtin_return_address(0)); - GEM_TRACE_DUMP(); + GEM_TRACE("%pS timed out.\n", + __builtin_return_address(0)); + GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); - repeat = 0; - ret = -EIO; - } - - /* Ensure we also flush after wedging. */ - if (flags & I915_WAIT_LOCKED) - i915_retire_requests(i915); - } while (repeat--); + intel_gt_set_wedged(&i915->gt); + ret = -EIO; + } + i915_retire_requests(i915); return ret; } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h index 63e009927c432..7541fa74e6418 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -9,6 +9,6 @@ struct drm_i915_private; -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); +int igt_flush_test(struct drm_i915_private *i915); #endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 3e902761cd166..04a6f88fdf645 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -19,15 +19,12 @@ int igt_live_test_begin(struct igt_live_test *t, enum intel_engine_id id; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - t->i915 = i915; t->func = func; t->name = name; err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", @@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) return -EIO; if (t->reset_global != i915_reset_count(&i915->gpu_error)) { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 2448067822aff..622bb2127453b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -41,8 +41,6 @@ void mock_device_flush(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - do { for_each_engine(engine, i915, id) mock_engine_flush(engine); @@ -55,9 +53,7 @@ static void mock_device_release(struct drm_device *dev) struct intel_engine_cs *engine; enum intel_engine_id id; - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); flush_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); From 33d856445b3ef6f00466cd9fbb04d69061143077 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:03 +0100 Subject: [PATCH 313/331] drm/i915: Remove the GEM idle worker Nothing inside the idle worker now requires struct_mutex, so we can remove the indirection of using our own worker. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 28 ++----------------- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 -- drivers/gpu/drm/i915/i915_debugfs.c | 5 ---- drivers/gpu/drm/i915/i915_drv.h | 9 ------ .../gpu/drm/i915/selftests/mock_gem_device.c | 6 ---- 5 files changed, 2 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 2ddc3aeaac9d4..26f325bbfe4d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,36 +13,13 @@ static void i915_gem_park(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); + cancel_delayed_work(&i915->gem.retire_work); i915_vma_parked(i915); i915_globals_park(); } -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool park; - - cancel_delayed_work_sync(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - park = (!intel_wakeref_is_active(&i915->gt.wakeref) && - !work_pending(work)); - intel_wakeref_unlock(&i915->gt.wakeref); - if (park) - i915_gem_park(i915); - else - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - - mutex_unlock(&i915->drm.struct_mutex); -} - static void retire_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = @@ -71,7 +48,7 @@ static int pm_notifier(struct notifier_block *nb, break; case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); + i915_gem_park(i915); break; } @@ -264,7 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915) void i915_gem_init__pm(struct drm_i915_private *i915) { - INIT_WORK(&i915->gem.idle_work, idle_work_handler); INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); i915->gem.pm_notifier.notifier_call = pm_notifier; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 856b8e467ee81..4ba6ed5c8313c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -572,11 +572,8 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); } static void restore_retire_worker(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 385289895107b..7c4bba21adcd6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3662,11 +3662,6 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(i915); fs_reclaim_release(GFP_KERNEL); - if (val & DROP_IDLE) { - flush_delayed_work(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); - } - if (val & DROP_FREED) i915_gem_drain_freed_objects(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 337d8306416a5..ad31852e43092 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1719,15 +1719,6 @@ struct drm_i915_private { * fires, go retire requests. */ struct delayed_work retire_work; - - /** - * When we detect an idle GPU, we want to turn on - * powersaving features. So once we see that there - * are no more requests outstanding and no more - * arrive within a small period of time, we fire - * off the idle_work. - */ - struct work_struct idle_work; } gem; /* For i945gm vblank irq vs. C3 workaround */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 622bb2127453b..a8be5da2b3cf9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -55,7 +55,6 @@ static void mock_device_release(struct drm_device *dev) mock_device_flush(i915); - flush_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); @@ -103,10 +102,6 @@ static void mock_retire_work_handler(struct work_struct *work) { } -static void mock_idle_work_handler(struct work_struct *work) -{ -} - static int pm_domain_resume(struct device *dev) { return pm_generic_runtime_resume(dev); @@ -187,7 +182,6 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); - INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler); intel_timelines_init(i915); From f33a8a51602c84cc7d5cadd2655835ba3b7d03f9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:04 +0100 Subject: [PATCH 314/331] drm/i915: Merge wait_for_timelines with retire_request wait_for_timelines is essentially the same loop as retiring requests (with an extra timeout), so merge the two into one routine. v2: i915_retire_requests_timeout and keep VT'd w/a as !interruptible Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 4 +- .../drm/i915/gem/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 67 ++----------------- drivers/gpu/drm/i915/i915_gem_evict.c | 12 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 26 ++++++- drivers/gpu/drm/i915/i915_request.h | 7 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 4 +- .../gpu/drm/i915/selftests/igt_live_test.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 14 files changed, 50 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c19431d609fca..418d0d2b5fa93 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -432,9 +432,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) /* Attempt to reap some mmap space from dead objects */ do { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); if (err) break; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 26f325bbfe4d2..90b211257f2da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -59,9 +59,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* XXX hide warning from gem_eio */ if (i915_modparams.reset) { dev_err(gt->i915->drm.dev, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index f5402aad9b5a0..f902aeee17554 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1137,7 +1137,7 @@ __sseu_finish(const char *name, if ((flags & TEST_IDLE) && ret == 0) { ret = i915_gem_wait_for_idle(ce->engine->i915, - 0, MAX_SCHEDULE_TIMEOUT); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d4cefdd384313..bdb34f03ec477 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -196,8 +196,7 @@ int intel_gt_resume(struct intel_gt *gt) static void wait_for_idle(struct intel_gt *gt) { - if (i915_gem_wait_for_idle(gt->i915, 0, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7c4bba21adcd6..5888a658e2b77 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3635,9 +3635,7 @@ i915_drop_caches_set(void *data, u64 val) i915_retire_requests(i915); if (val & (DROP_IDLE | DROP_ACTIVE)) { - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad31852e43092..44f3463ff9f17 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2321,8 +2321,7 @@ void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); void i915_gem_driver_release(struct drm_i915_private *dev_priv); -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags, long timeout); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout); void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 83c2c7bf1e349..12d21e5cf3ede 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -883,61 +883,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -static long -wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout) -{ - struct intel_gt_timelines *timelines = >->timelines; - struct intel_timeline *tl; - unsigned long flags; - - spin_lock_irqsave(&timelines->lock, flags); - list_for_each_entry(tl, &timelines->active_list, link) { - struct dma_fence *fence; - - fence = i915_active_fence_get(&tl->last_request); - if (!fence) - continue; - - spin_unlock_irqrestore(&timelines->lock, flags); - - if (!dma_fence_is_i915(fence)) { - timeout = dma_fence_wait_timeout(fence, - flags & I915_WAIT_INTERRUPTIBLE, - timeout); - } else { - struct i915_request *rq = to_request(fence); - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used as - * a synchronous step prior to idling, e.g. in suspend - * for flushing all current operations to memory before - * sleeping. These we want to complete as quickly as - * possible to avoid prolonged stalls, so allow the gpu - * to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq); - - timeout = i915_request_wait(rq, flags, timeout); - } - - dma_fence_put(fence); - if (timeout < 0) - return timeout; - - /* restart after reacquiring the lock */ - spin_lock_irqsave(&timelines->lock, flags); - tl = list_entry(&timelines->active_list, typeof(*tl), link); - } - spin_unlock_irqrestore(&timelines->lock, flags); - - return timeout; -} - -int i915_gem_wait_for_idle(struct drm_i915_private *i915, - unsigned int flags, long timeout) +int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout) { struct intel_gt *gt = &i915->gt; @@ -945,18 +891,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!intel_gt_pm_is_awake(gt)) return 0; - do { - timeout = wait_for_timelines(gt, flags, timeout); - if (timeout < 0) - return timeout; - + while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) { cond_resched(); if (signal_pending(current)) return -EINTR; + } - } while (i915_retire_requests(i915)); - - return 0; + return timeout; } struct i915_vma * diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0552bf93eea3c..0a412f6d01d73 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -46,9 +46,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); } static bool @@ -126,6 +124,8 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); + i915_retire_requests(vm->i915); + search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); @@ -264,13 +264,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, trace_i915_gem_evict_node(vm, target, flags); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(vm->i915); + i915_retire_requests(vm->i915); if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7462d87f7a48c..082fcf9085a6b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2528,7 +2528,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { + /* XXX This does not prevent more requests being submitted! */ + if (i915_retire_requests_timeout(dev_priv, + -MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4ffe62a421864..52f7c4e5b6441 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1508,13 +1508,19 @@ long i915_request_wait(struct i915_request *rq, return timeout; } -bool i915_retire_requests(struct drm_i915_private *i915) +long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout) { struct intel_gt_timelines *timelines = &i915->gt.timelines; struct intel_timeline *tl, *tn; + unsigned long active_count = 0; unsigned long flags; + bool interruptible; LIST_HEAD(free); + interruptible = true; + if (timeout < 0) + timeout = -timeout, interruptible = false; + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) @@ -1525,13 +1531,27 @@ bool i915_retire_requests(struct drm_i915_private *i915) tl->active_count++; /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + interruptible, + timeout); + dma_fence_put(fence); + } + } + retire_requests(tl); spin_lock_irqsave(&timelines->lock, flags); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (!--tl->active_count) + if (--tl->active_count) + active_count += !!rcu_access_pointer(tl->last_request.fence); + else list_del(&tl->link); mutex_unlock(&tl->mutex); @@ -1547,7 +1567,7 @@ bool i915_retire_requests(struct drm_i915_private *i915) list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); - return !list_empty(&timelines->active_list); + return active_count ? timeout : 0; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 621fb33cda30d..256b0715180f4 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -310,7 +310,6 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ #define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(3) static inline bool i915_request_signaled(const struct i915_request *rq) { @@ -460,6 +459,10 @@ i915_request_active_timeline(struct i915_request *rq) lockdep_is_held(&rq->engine->active.lock)); } -bool i915_retire_requests(struct drm_i915_private *i915); +long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout); +static inline void i915_retire_requests(struct drm_i915_private *i915) +{ + i915_retire_requests_timeout(i915, 0); +} #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 2a5fbe46ea9f5..ed496bd6d84fa 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -18,8 +18,7 @@ int igt_flush_test(struct drm_i915_private *i915) cond_resched(); - i915_retire_requests(i915); - if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) { + if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); @@ -30,7 +29,6 @@ int igt_flush_test(struct drm_i915_private *i915) intel_gt_set_wedged(&i915->gt); ret = -EIO; } - i915_retire_requests(i915); return ret; } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 04a6f88fdf645..eae90f97df6c7 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -23,9 +23,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index a8be5da2b3cf9..3b589bbb2c2d1 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, i915, id) mock_engine_flush(engine); - } while (i915_retire_requests(i915)); + } while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) From 789ed955215d6f925008cb2381aa9f64cd635dd2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:05 +0100 Subject: [PATCH 315/331] drm/i915/gem: Retire directly for mmap-offset shrinking Now that we can retire without taking struct_mutex, we can do so to handle shrinking the mmap-offset space after an allocation failure. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 418d0d2b5fa93..45bbd22c14f1d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -431,19 +431,12 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) return 0; /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); - if (err) - break; + err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; - i915_gem_drain_freed_objects(i915); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; - - } while (flush_delayed_work(&i915->gem.retire_work)); - - return err; + i915_gem_drain_freed_objects(i915); + return drm_gem_create_mmap_offset(&obj->base); } int From 661019754202d610203a9cf09d26fdd8677e41c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:06 +0100 Subject: [PATCH 316/331] drm/i915: Move request runtime management onto gt Requests are run from the gt and are tided into the gt runtime power management, so pull the runtime request management under gt/ Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 28 +--- .../drm/i915/gem/selftests/i915_gem_context.c | 5 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c | 2 + drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 123 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_requests.h | 24 ++++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 11 ++ drivers/gpu/drm/i915/gt/selftest_timeline.c | 8 +- drivers/gpu/drm/i915/i915_debugfs.c | 17 +-- drivers/gpu/drm/i915/i915_drv.h | 10 -- drivers/gpu/drm/i915/i915_gem.c | 17 --- drivers/gpu/drm/i915/i915_gem_evict.c | 14 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +- drivers/gpu/drm/i915/i915_request.c | 64 +-------- drivers/gpu/drm/i915/i915_request.h | 7 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 9 +- .../gpu/drm/i915/selftests/igt_live_test.c | 5 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 10 +- 21 files changed, 213 insertions(+), 158 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b020b024286d0..4e23f1a5c39f6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -83,6 +83,7 @@ gt-y += \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ + gt/intel_gt_requests.o \ gt/intel_hangcheck.o \ gt/intel_lrc.o \ gt/intel_rc6.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 45bbd22c14f1d..fd4122d8c0a95 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -8,6 +8,7 @@ #include #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_gem_gtt.h" @@ -424,6 +425,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) static int create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_gt *gt = &i915->gt; int err; err = drm_gem_create_mmap_offset(&obj->base); @@ -431,7 +433,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) return 0; /* Attempt to reap some mmap space from dead objects */ - err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 90b211257f2da..9194d8464bf73 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -7,31 +7,18 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_globals.h" static void i915_gem_park(struct drm_i915_private *i915) { - cancel_delayed_work(&i915->gem.retire_work); - i915_vma_parked(i915); i915_globals_park(); } -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - i915_retire_requests(i915); - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - static int pm_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -42,9 +29,6 @@ static int pm_notifier(struct notifier_block *nb, switch (action) { case INTEL_GT_UNPARK: i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); break; case INTEL_GT_PARK: @@ -59,7 +43,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* XXX hide warning from gem_eio */ if (i915_modparams.reset) { dev_err(gt->i915->drm.dev, @@ -122,14 +106,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(&i915->gt); + intel_gt_suspend(&i915->gt); + intel_uc_suspend(&i915->gt.uc); cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(&i915->gt.uc); - intel_gt_suspend(&i915->gt); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -239,8 +221,6 @@ void i915_gem_resume(struct drm_i915_private *i915) void i915_gem_init__pm(struct drm_i915_private *i915) { - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - i915->gem.pm_notifier.notifier_call = pm_notifier; blocking_notifier_chain_register(&i915->gt.pm_notifications, &i915->gem.pm_notifier); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index f902aeee17554..2288757808aec 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -518,7 +519,7 @@ create_test_object(struct i915_address_space *vm, int err; /* Keep in GEM's good graces */ - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); @@ -1136,7 +1137,7 @@ __sseu_finish(const char *name, igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(ce->engine->i915, + ret = intel_gt_wait_for_idle(ce->engine->gt, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 4ba6ed5c8313c..1cd25cfd0246f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -573,7 +573,7 @@ static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); intel_gt_pm_get(&i915->gt); - cancel_delayed_work_sync(&i915->gem.retire_work); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7205595369be2..53220741e49e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" #include "intel_uncore.h" @@ -23,6 +24,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); + intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); intel_uc_init_early(>->uc); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index bdb34f03ec477..d2e80ba64d69a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -10,6 +10,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_pm.h" #include "intel_rc6.h" #include "intel_wakeref.h" @@ -49,6 +50,7 @@ static int __gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); intel_gt_queue_hangcheck(gt); + intel_gt_unpark_requests(gt); pm_notify(gt, INTEL_GT_UNPARK); @@ -64,6 +66,7 @@ static int __gt_park(struct intel_wakeref *wf) GEM_TRACE("\n"); pm_notify(gt, INTEL_GT_PARK); + intel_gt_park_requests(gt); i915_pmu_gt_parked(i915); if (INTEL_GEN(i915) >= 6) @@ -196,7 +199,7 @@ int intel_gt_resume(struct intel_gt *gt) static void wait_for_idle(struct intel_gt *gt) { - if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c new file mode 100644 index 0000000000000..8aed89fd2cdc1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_request.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_timeline.h" + +static void retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + break; +} + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + unsigned long active_count = 0; + unsigned long flags; + bool interruptible; + LIST_HEAD(free); + + interruptible = true; + if (unlikely(timeout < 0)) + timeout = -timeout, interruptible = false; + + spin_lock_irqsave(&timelines->lock, flags); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) + continue; + + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock_irqrestore(&timelines->lock, flags); + + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + true, + timeout); + dma_fence_put(fence); + } + } + + retire_requests(tl); + + spin_lock_irqsave(&timelines->lock, flags); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (--tl->active_count) + active_count += !!rcu_access_pointer(tl->last_request.fence); + else + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } + } + spin_unlock_irqrestore(&timelines->lock, flags); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); + + return active_count ? timeout : 0; +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout; +} + +static void retire_work_handler(struct work_struct *work) +{ + struct intel_gt *gt = + container_of(work, typeof(*gt), requests.retire_work.work); + + intel_gt_retire_requests(gt); + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} + +void intel_gt_init_requests(struct intel_gt *gt) +{ + INIT_DELAYED_WORK(>->requests.retire_work, retire_work_handler); +} + +void intel_gt_park_requests(struct intel_gt *gt) +{ + cancel_delayed_work(>->requests.retire_work); +} + +void intel_gt_unpark_requests(struct intel_gt *gt) +{ + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h new file mode 100644 index 0000000000000..bd31cbce47e0f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_REQUESTS_H +#define INTEL_GT_REQUESTS_H + +struct intel_gt; + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +static inline void intel_gt_retire_requests(struct intel_gt *gt) +{ + intel_gt_retire_requests_timeout(gt, 0); +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + +void intel_gt_init_requests(struct intel_gt *gt); +void intel_gt_park_requests(struct intel_gt *gt); +void intel_gt_unpark_requests(struct intel_gt *gt); + +#endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 7134f1319bbe9..802f516a34301 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -50,6 +50,17 @@ struct intel_gt { struct list_head hwsp_free_list; } timelines; + struct intel_gt_requests { + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests. + */ + struct delayed_work retire_work; + } requests; + struct intel_wakeref wakeref; atomic_t user_wakeref; diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 16abfabf08c7d..d6df40cdc8a61 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -8,6 +8,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gt_requests.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -641,6 +642,7 @@ static int live_hwsp_alternate(void *arg) static int live_hwsp_wrap(void *arg) { struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; @@ -651,7 +653,7 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - tl = intel_timeline_create(&i915->gt, NULL); + tl = intel_timeline_create(gt, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -662,7 +664,7 @@ static int live_hwsp_wrap(void *arg) if (err) goto out_free; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { const u32 *hwsp_seqno[2]; struct i915_request *rq; u32 seqno[2]; @@ -734,7 +736,7 @@ static int live_hwsp_wrap(void *arg) goto out; } - i915_retire_requests(i915); /* recycle HWSP */ + intel_gt_retire_requests(gt); /* recycle HWSP */ } out: diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5888a658e2b77..2afc41e43b6e1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -41,6 +41,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "gt/intel_rc6.h" #include "gt/uc/intel_guc_submission.h" @@ -3621,33 +3622,33 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + struct intel_gt *gt = &i915->gt; int ret; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); if (val & DROP_RESET_ACTIVE && - wait_for(intel_engines_are_idle(&i915->gt), - I915_IDLE_ENGINES_TIMEOUT)) - intel_gt_set_wedged(&i915->gt); + wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) + intel_gt_set_wedged(gt); if (val & DROP_RETIRE) - i915_retire_requests(i915); + intel_gt_retire_requests(gt); if (val & (DROP_IDLE | DROP_ACTIVE)) { - ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; } if (val & DROP_IDLE) { - ret = intel_gt_pm_wait_for_idle(&i915->gt); + ret = intel_gt_pm_wait_for_idle(gt); if (ret) return ret; } - if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt)) - intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL); + if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) + intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 44f3463ff9f17..cb63b2bd0ce88 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1710,15 +1710,6 @@ struct drm_i915_private { struct { struct notifier_block pm_notifier; - - /** - * We leave the user IRQ off as much as possible, - * but this means that requests will finish and never - * be retired once the system goes idle. Set a timer to - * fire periodically while the ring is running. When it - * fires, go retire requests. - */ - struct delayed_work retire_work; } gem; /* For i945gm vblank irq vs. C3 workaround */ @@ -2321,7 +2312,6 @@ void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); void i915_gem_driver_release(struct drm_i915_private *dev_priv); -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout); void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 12d21e5cf3ede..a1077919b16a3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -883,23 +883,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout) -{ - struct intel_gt *gt = &i915->gt; - - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(gt)) - return 0; - - while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) { - cond_resched(); - if (signal_pending(current)) - return -EINTR; - } - - return timeout; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0a412f6d01d73..7e62c310290f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -29,6 +29,7 @@ #include #include "gem/i915_gem_context.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_trace.h" @@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static int ggtt_flush(struct drm_i915_private *i915) +static int ggtt_flush(struct intel_gt *gt) { /* * Not everything in the GGTT is tracked via vma (otherwise we @@ -46,7 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); + return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } static bool @@ -92,7 +93,6 @@ i915_gem_evict_something(struct i915_address_space *vm, u64 start, u64 end, unsigned flags) { - struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; struct i915_vma *vma, *next; @@ -124,7 +124,7 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); search_again: active = NULL; @@ -197,7 +197,7 @@ i915_gem_evict_something(struct i915_address_space *vm, if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) return -EBUSY; - ret = ggtt_flush(dev_priv); + ret = ggtt_flush(vm->gt); if (ret) return ret; @@ -270,7 +270,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ @@ -372,7 +372,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) * switch otherwise is ineffective. */ if (i915_is_ggtt(vm)) { - ret = ggtt_flush(vm->i915); + ret = ggtt_flush(vm->gt); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 082fcf9085a6b..1d26634ca597b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -38,6 +38,7 @@ #include "display/intel_frontbuffer.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -2529,8 +2530,8 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, if (unlikely(ggtt->do_idle_maps)) { /* XXX This does not prevent more requests being submitted! */ - if (i915_retire_requests_timeout(dev_priv, - -MAX_SCHEDULE_TIMEOUT)) { + if (intel_gt_retire_requests_timeout(ggtt->vm.gt, + -MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 52f7c4e5b6441..437f9fc6282e3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -216,7 +216,7 @@ static void remove_from_engine(struct i915_request *rq) spin_unlock(&locked->active.lock); } -static bool i915_request_retire(struct i915_request *rq) +bool i915_request_retire(struct i915_request *rq) { if (!i915_request_completed(rq)) return false; @@ -1508,68 +1508,6 @@ long i915_request_wait(struct i915_request *rq, return timeout; } -long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout) -{ - struct intel_gt_timelines *timelines = &i915->gt.timelines; - struct intel_timeline *tl, *tn; - unsigned long active_count = 0; - unsigned long flags; - bool interruptible; - LIST_HEAD(free); - - interruptible = true; - if (timeout < 0) - timeout = -timeout, interruptible = false; - - spin_lock_irqsave(&timelines->lock, flags); - list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { - if (!mutex_trylock(&tl->mutex)) - continue; - - intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ - spin_unlock_irqrestore(&timelines->lock, flags); - - if (timeout > 0) { - struct dma_fence *fence; - - fence = i915_active_fence_get(&tl->last_request); - if (fence) { - timeout = dma_fence_wait_timeout(fence, - interruptible, - timeout); - dma_fence_put(fence); - } - } - - retire_requests(tl); - - spin_lock_irqsave(&timelines->lock, flags); - - /* Resume iteration after dropping lock */ - list_safe_reset_next(tl, tn, link); - if (--tl->active_count) - active_count += !!rcu_access_pointer(tl->last_request.fence); - else - list_del(&tl->link); - - mutex_unlock(&tl->mutex); - - /* Defer the final release to after the spinlock */ - if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); - list_add(&tl->link, &free); - } - } - spin_unlock_irqrestore(&timelines->lock, flags); - - list_for_each_entry_safe(tl, tn, &free, link) - __intel_timeline_free(&tl->kref); - - return active_count ? timeout : 0; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 256b0715180f4..6a95242b280dc 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -250,6 +250,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr); +bool i915_request_retire(struct i915_request *rq); void i915_request_retire_upto(struct i915_request *rq); static inline struct i915_request * @@ -459,10 +460,4 @@ i915_request_active_timeline(struct i915_request *rq) lockdep_is_held(&rq->engine->active.lock)); } -long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout); -static inline void i915_retire_requests(struct drm_i915_private *i915) -{ - i915_retire_requests_timeout(i915, 0); -} - #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index ed496bd6d84fa..7b0939e3f0074 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -4,8 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "gem/i915_gem_context.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_selftest.h" @@ -14,11 +14,12 @@ int igt_flush_test(struct drm_i915_private *i915) { - int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0; + struct intel_gt *gt = &i915->gt; + int ret = intel_gt_is_wedged(gt) ? -EIO : 0; cond_resched(); - if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) { + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); @@ -26,7 +27,7 @@ int igt_flush_test(struct drm_i915_private *i915) __builtin_return_address(0)); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); ret = -EIO; } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index eae90f97df6c7..810b60100c2c8 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -4,7 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_drv.h" +#include "i915_drv.h" +#include "gt/intel_gt_requests.h" #include "../i915_selftest.h" #include "igt_flush_test.h" @@ -23,7 +24,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3b589bbb2c2d1..4e6cde0d48593 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -26,6 +26,7 @@ #include #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" #include "mock_request.h" @@ -44,7 +45,8 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, i915, id) mock_engine_flush(engine); - } while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(&i915->gt, + MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) @@ -98,10 +100,6 @@ static void release_dev(struct device *dev) kfree(pdev); } -static void mock_retire_work_handler(struct work_struct *work) -{ -} - static int pm_domain_resume(struct device *dev) { return pm_generic_runtime_resume(dev); @@ -181,8 +179,6 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); - INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); - intel_timelines_init(i915); mutex_lock(&i915->drm.struct_mutex); From a2b4dead98ef1241498ff8769c85154995938087 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:07 +0100 Subject: [PATCH 317/331] drm/i915: Move global activity tracking from GEM to GT As our global unpark/park keep track of the number of active users, we can simply move the accounting from the GEM layer to the base GT layer. It was placed originally inside GEM to benefit from the 100ms extra delay on idleness, but that has been eliminated and now there is no substantive difference between the layers. In moving it, we move another piece of the puzzle out from underneath struct_mutex. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 11 +---------- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 +++++ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 9194d8464bf73..7c316d4633db8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -10,14 +10,6 @@ #include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" - -static void i915_gem_park(struct drm_i915_private *i915) -{ - i915_vma_parked(i915); - - i915_globals_park(); -} static int pm_notifier(struct notifier_block *nb, unsigned long action, @@ -28,11 +20,10 @@ static int pm_notifier(struct notifier_block *nb, switch (action) { case INTEL_GT_UNPARK: - i915_globals_unpark(); break; case INTEL_GT_PARK: - i915_gem_park(i915); + i915_vma_parked(i915); break; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d2e80ba64d69a..b52e2ba3d0922 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "i915_globals.h" #include "i915_params.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -27,6 +28,8 @@ static int __gt_unpark(struct intel_wakeref *wf) GEM_TRACE("\n"); + i915_globals_unpark(); + /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -78,6 +81,8 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + i915_globals_park(); + return 0; } From 2935ed5339c495066d901b97de384d86c55a3f6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:08 +0100 Subject: [PATCH 318/331] drm/i915: Remove logical HW ID With the introduction of ctx->engines[] we allow multiple logical contexts to be used on the same engine (e.g. with virtual engines). According to bspec, aach logical context requires a unique tag in order for context-switching to occur correctly between them. [Simple experiments show that it is not so easy to trick the HW into performing a lite-restore with matching logical IDs, though my memory from early Broadwell experiments do suggest that it should be generating lite-restores.] We only need to keep a unique tag for the active lifetime of the context, and for as long as we need to identify that context. The HW uses the tag to determine if it should use a lite-restore (why not the LRCA?) and passes the tag back for various status identifies. The only status we need to track is for OA, so when using perf, we assign the specific context a unique tag. v2: Calculate required number of tags to fill ELSP. Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set of engines") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111895 Signed-off-by: Chris Wilson Acked-by: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 153 ------------------ drivers/gpu/drm/i915/gem/i915_gem_context.h | 15 -- .../gpu/drm/i915/gem/i915_gem_context_types.h | 18 --- .../drm/i915/gem/selftests/i915_gem_context.c | 13 +- .../gpu/drm/i915/gem/selftests/mock_context.c | 8 - drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 32 ++-- drivers/gpu/drm/i915/gt/intel_lrc.h | 6 + drivers/gpu/drm/i915/gvt/kvmgt.c | 17 -- drivers/gpu/drm/i915/i915_debugfs.c | 3 - drivers/gpu/drm/i915/i915_drv.h | 12 -- drivers/gpu/drm/i915/i915_gpu_error.c | 7 +- drivers/gpu/drm/i915/i915_gpu_error.h | 1 - drivers/gpu/drm/i915/i915_perf.c | 25 ++- drivers/gpu/drm/i915/i915_trace.h | 38 ++--- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 18 files changed, 57 insertions(+), 302 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0ab416887fc2a..cd4f327b23bd3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -167,97 +167,6 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) -{ - unsigned int max; - - lockdep_assert_held(&i915->contexts.mutex); - - if (INTEL_GEN(i915) >= 12) - max = GEN12_MAX_CONTEXT_HW_ID; - else if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) - /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. - */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); - continue; - } - - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } - - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; -} - -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) -{ - int ret; - - lockdep_assert_held(&i915->contexts.mutex); - - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } - - *out = ret; - return 0; -} - -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - - if (list_empty(&ctx->hw_id_link)) - return; - - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); -} - static void __free_engines(struct i915_gem_engines *e, unsigned int count) { while (count--) { @@ -312,8 +221,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - release_hw_id(ctx); - free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -386,12 +293,6 @@ static void context_close(struct i915_gem_context *ctx) ctx->file_priv = ERR_PTR(-EBADF); - /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. - */ - release_hw_id(ctx); - /* * The LUT uses the VMA as a backpointer to unref the object, * so we need to clear the LUT before we close all the VMA (inside @@ -430,7 +331,6 @@ __create_context(struct drm_i915_private *i915) RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -584,18 +484,11 @@ struct i915_gem_context * i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - int err; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - i915_gem_context_clear_bannable(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); @@ -609,12 +502,6 @@ static void init_contexts(struct drm_i915_private *i915) mutex_init(&i915->contexts.mutex); INIT_LIST_HEAD(&i915->contexts.list); - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); init_llist_head(&i915->contexts.free_list); } @@ -634,15 +521,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) DRM_ERROR("Failed to create default global context\n"); return PTR_ERR(ctx); } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); dev_priv->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", @@ -656,10 +534,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); } static int context_idr_cleanup(int id, void *p, void *data) @@ -2316,33 +2190,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return ret; } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - /* GEM context-engines iterator: for_each_gem_engine() */ struct intel_context * i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 176978608b6fe..50bc27d30c034 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -112,21 +112,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 260d59cc3de84..87be27877e223 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -147,24 +147,6 @@ struct i915_gem_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - struct mutex mutex; struct i915_sched_attr sched; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 2288757808aec..2fb31ada2fa7a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -660,9 +660,9 @@ static int igt_ctx_exec(void *arg) err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, + engine->name, yesno(!!ctx->vm), err); intel_context_put(ce); kernel_context_close(ctx); @@ -798,9 +798,9 @@ static int igt_shared_ctx_exec(void *arg) err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, + engine->name, yesno(!!ctx->vm), err); intel_context_put(ce); kernel_context_close(ctx); @@ -1382,10 +1382,9 @@ static int igt_ctx_readonly(void *arg) err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - ce->engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + ce->engine->name, yesno(!!ctx->vm), err); i915_gem_context_unlock_engines(ctx); goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff241..0104f16b13270 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,13 +29,8 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; @@ -54,8 +48,6 @@ mock_context(struct drm_i915_private *i915, return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index bf9cedfccbf03..6959b05ae5f8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -58,6 +58,7 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + u32 tag; /* cookie passed to HW to track this context on submission */ unsigned int active_count; /* protected by timeline->mutex */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 943f0663837ec..6199064f332bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -303,10 +303,12 @@ struct intel_engine_cs { u8 uabi_class; u8 uabi_instance; + u32 uabi_capabilities; u32 context_size; u32 mmio_base; - u32 uabi_capabilities; + unsigned int context_tag; +#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) struct rb_node uabi_node; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3cfea1758fd2f..468438fb47af5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -440,12 +440,8 @@ assert_priority_queue(const struct i915_request *prev, static u64 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_gem_context *ctx = ce->gem_context; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) desc = INTEL_LEGACY_64B_CONTEXT; @@ -463,20 +459,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) * anything below. */ if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ - /* TODO: decide what to do with SW counter (bits 55-60) */ - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; /* bits 61-63 */ - } else { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ } return desc; @@ -985,6 +972,18 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (ce->tag) { + /* Use a fixed tag for OA and friends */ + ce->lrc_desc |= (u64)ce->tag << 32; + } else { + /* We don't need a strict matching tag, just different values */ + ce->lrc_desc &= ~GENMASK_ULL(47, 37); + ce->lrc_desc |= + (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << + GEN11_SW_CTX_ID_SHIFT; + BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + } + intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -2114,7 +2113,6 @@ static void execlists_context_unpin(struct intel_context *ce) check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, ce->engine); - i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); intel_ring_reset(ce->ring, ce->ring->tail); } @@ -2164,18 +2162,12 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = i915_gem_context_pin_hw_id(ce->gem_context); - if (ret) - goto unpin_map; - ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; -unpin_map: - i915_gem_object_unpin_map(ce->state->obj); unpin_active: intel_context_active_release(ce); err: diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 66ac616361c1f..99dc576a4e25a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -66,6 +66,12 @@ struct intel_engine_cs; #define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) +#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ +/* in Gen12 ID 0x7FF is reserved to indicate idle */ +#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) + enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 343d79c1cb7e0..04a5a0d908239 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "\n"); } -static ssize_t -hw_id_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mdev_device *mdev = mdev_from_dev(dev); - - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%u\n", - vgpu->submission.shadow[0]->gem_context->hw_id); - } - return sprintf(buf, "\n"); -} - static DEVICE_ATTR_RO(vgpu_id); -static DEVICE_ATTR_RO(hw_id); static struct attribute *intel_vgpu_attrs[] = { &dev_attr_vgpu_id.attr, - &dev_attr_hw_id.attr, NULL }; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2afc41e43b6e1..0e90ac608e07a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1593,9 +1593,6 @@ static int i915_context_status(struct seq_file *m, void *unused) struct intel_context *ce; seq_puts(m, "HW context "); - if (!list_empty(&ctx->hw_id_link)) - seq_printf(m, "%x [pin %u]", ctx->hw_id, - atomic_read(&ctx->hw_id_pin_count)); if (ctx->pid) { struct task_struct *task; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cb63b2bd0ce88..6bdcffbf1b9b5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1541,18 +1541,6 @@ struct drm_i915_private { struct list_head list; struct llist_head free_list; struct work_struct free_work; - - /* The hw wants to have a stable context identifier for the - * lifetime of the context (for OA, PASID, faults, etc). - * This is limited in execlists to 21 bits. - */ - struct ida hw_ida; -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ -/* in Gen12 ID 0x7FF is reserved to indicate idle */ -#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) - struct list_head hw_id_list; } contexts; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a28ee754b7b48..5cf4eed5add8b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -471,9 +471,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n", - header, ctx->comm, ctx->pid, ctx->hw_id, - ctx->sched_attr.priority, ctx->guilty, ctx->active); + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->sched_attr.priority, + ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -1271,7 +1271,6 @@ static bool record_context(struct drm_i915_error_context *e, rcu_read_unlock(); } - e->hw_id = ctx->hw_id; e->sched_attr = ctx->sched; e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 63cf387411e02..7f1cd0b1fef7b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -119,7 +119,6 @@ struct i915_gpu_state { struct drm_i915_error_context { char comm[TASK_COMM_LEN]; pid_t pid; - u32 hw_id; int active; int guilty; struct i915_sched_attr sched_attr; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 80055501eccb5..ecfbc37b738bb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1283,22 +1283,15 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } else { stream->specific_ctx_id_mask = (1U << GEN8_CTX_ID_WIDTH) - 1; - stream->specific_ctx_id = - upper_32_bits(ce->lrc_desc); - stream->specific_ctx_id &= - stream->specific_ctx_id_mask; + stream->specific_ctx_id = stream->specific_ctx_id_mask; } break; case 11: case 12: { stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | - ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | - ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); - stream->specific_ctx_id = upper_32_bits(ce->lrc_desc); - stream->specific_ctx_id &= - stream->specific_ctx_id_mask; + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = stream->specific_ctx_id_mask; break; } @@ -1306,6 +1299,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) MISSING_CASE(INTEL_GEN(i915)); } + ce->tag = stream->specific_ctx_id_mask; + DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id, stream->specific_ctx_id_mask); @@ -1324,12 +1319,14 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct intel_context *ce; - stream->specific_ctx_id = INVALID_CTX_ID; - stream->specific_ctx_id_mask = 0; - ce = fetch_and_zero(&stream->pinned_ctx); - if (ce) + if (ce) { + ce->tag = 0; /* recomputed on next submission after parking */ intel_context_unpin(ce); + } + + stream->specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id_mask = 0; } static void diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 24f2944da09de..1f2cf6cfafb5d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue, __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->flags) + __entry->ctx, __entry->seqno, __entry->flags) ); DECLARE_EVENT_CLASS(i915_request, @@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno) + __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in, __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->prio, __entry->port) ); @@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out, __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->completed) + __entry->ctx, __entry->seqno, __entry->completed) ); #else @@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin, */ TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin, __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->flags) ); @@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) - __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->hw_id = ctx->hw_id; __entry->vm = ctx->vm; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", - __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p", + __entry->dev, __entry->ctx, __entry->vm) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 52d2df843148e..f39f0282e78c7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -491,8 +491,8 @@ static int igt_evict_contexts(void *arg) if (IS_ERR(rq)) { /* When full, fail_if_busy will trigger EBUSY */ if (PTR_ERR(rq) != -EBUSY) { - pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", - ctx->hw_id, engine->name, + pr_err("Unexpected error from request alloc (on %s): %d\n", + engine->name, (int)PTR_ERR(rq)); err = PTR_ERR(rq); } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 1c9db08f7c283..ac1ff558eb907 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -170,7 +170,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) From a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:09 +0100 Subject: [PATCH 319/331] drm/i915: Move context management under GEM Keep track of the GEM contexts underneath i915->gem.contexts and assign them their own lock for the purposes of list management. v2: Focus on lock tracking; ctx->vm is protected by ctx->mutex v3: Correct split with removal of logical HW ID Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 177 +++++++++--------- drivers/gpu/drm/i915/gem/i915_gem_context.h | 27 ++- .../gpu/drm/i915/gem/i915_gem_context_types.h | 2 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 36 ++-- .../drm/i915/gem/selftests/i915_gem_context.c | 168 ++++++++--------- .../gpu/drm/i915/gem/selftests/mock_context.c | 7 +- drivers/gpu/drm/i915/gt/intel_context.c | 10 +- drivers/gpu/drm/i915/gt/selftest_context.c | 24 +-- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 39 ++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 6 +- .../gpu/drm/i915/gt/selftest_workarounds.c | 22 ++- drivers/gpu/drm/i915/gvt/scheduler.c | 24 +-- drivers/gpu/drm/i915/i915_debugfs.c | 50 +++-- drivers/gpu/drm/i915/i915_drv.c | 2 - drivers/gpu/drm/i915/i915_drv.h | 15 +- drivers/gpu/drm/i915/i915_gem.c | 10 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 24 ++- drivers/gpu/drm/i915/i915_sysfs.c | 43 ++--- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 8 - .../gpu/drm/i915/selftests/i915_gem_evict.c | 3 - drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 +- drivers/gpu/drm/i915/selftests/i915_request.c | 12 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 7 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 6 +- 28 files changed, 394 insertions(+), 354 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cd4f327b23bd3..5d8221c7ba83d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -218,9 +218,12 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); + free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -230,67 +233,54 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) kfree(ctx->name); put_pid(ctx->pid); - list_del(&ctx->link); mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); } -static void contexts_free(struct drm_i915_private *i915) +static void contexts_free_all(struct llist_node *list) { - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); struct i915_gem_context *ctx, *cn; - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) + llist_for_each_entry_safe(ctx, cn, list, free_link) i915_gem_context_free(ctx); } -static void contexts_free_first(struct drm_i915_private *i915) +static void contexts_flush_free(struct i915_gem_contexts *gc) { - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); + contexts_free_all(llist_del_all(&gc->free_list)); } static void contexts_free_worker(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); + struct i915_gem_contexts *gc = + container_of(work, typeof(*gc), free_work); - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); + contexts_flush_free(gc); } void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); + if (llist_add(&ctx->free_link, &gc->free_list)) + schedule_work(&gc->free_work); } static void context_close(struct i915_gem_context *ctx) { - i915_gem_context_set_closed(ctx); + struct i915_address_space *vm; - if (ctx->vm) - i915_vm_close(ctx->vm); + i915_gem_context_set_closed(ctx); mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_close(vm); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -317,7 +307,6 @@ __create_context(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - list_add_tail(&ctx->link, &i915->contexts.list); ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); @@ -343,6 +332,10 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return ctx; err_free: @@ -372,11 +365,11 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm) static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - struct i915_address_space *old = ctx->vm; + struct i915_address_space *old = i915_gem_context_vm(ctx); GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_open(vm); + rcu_assign_pointer(ctx->vm, i915_vm_open(vm)); context_apply_all(ctx, __apply_ppgtt, vm); return old; @@ -385,7 +378,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) static void __assign_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - if (vm == ctx->vm) + if (vm == rcu_access_pointer(ctx->vm)) return; vm = __set_ppgtt(ctx, vm); @@ -417,27 +410,25 @@ static void __assign_timeline(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) + !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the most stale context */ - contexts_free_first(dev_priv); + /* Reap the stale contexts */ + contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(dev_priv); + ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(dev_priv)) { + if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(i915); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -445,14 +436,17 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ERR_CAST(ppgtt); } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&dev_priv->gt, NULL); + timeline = intel_timeline_create(&i915->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -497,42 +491,40 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; } -static void init_contexts(struct drm_i915_private *i915) +static void init_contexts(struct i915_gem_contexts *gc) { - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); + spin_lock_init(&gc->lock); + INIT_LIST_HEAD(&gc->list); - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); + INIT_WORK(&gc->free_work, contexts_free_worker); + init_llist_head(&gc->free_list); } -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +int i915_gem_init_contexts(struct drm_i915_private *i915) { struct i915_gem_context *ctx; /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(i915->kernel_context); - init_contexts(dev_priv); + init_contexts(&i915->gem.contexts); /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); return PTR_ERR(ctx); } - dev_priv->kernel_context = ctx; + i915->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? + DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); return 0; } -void i915_gem_contexts_fini(struct drm_i915_private *i915) +void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); - destroy_kernel_context(&i915->kernel_context); } @@ -551,11 +543,16 @@ static int vm_idr_cleanup(int id, void *p, void *data) static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv) { + struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - if (ctx->vm) - ctx->vm->file = fpriv; + + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->file, fpriv); /* XXX */ + mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", @@ -592,9 +589,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, idr_init(&file_priv->context_idr); idr_init_base(&file_priv->vm_idr, 1); - mutex_lock(&i915->drm.struct_mutex); ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; @@ -622,6 +617,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); idr_destroy(&file_priv->context_idr); @@ -630,6 +626,8 @@ void i915_gem_context_close(struct drm_file *file) idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); mutex_destroy(&file_priv->vm_idr_lock); + + contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, @@ -808,16 +806,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_address_space *vm; int ret; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - + rcu_read_lock(); vm = i915_vm_get(ctx->vm); - mutex_unlock(&ctx->i915->drm.struct_mutex); + rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); if (ret) @@ -926,7 +920,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (args->size) return -EINVAL; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; if (upper_32_bits(args->value)) @@ -940,17 +934,20 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (!vm) return -ENOENT; - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + err = mutex_lock_interruptible(&ctx->mutex); if (err) goto out; - if (vm == ctx->vm) + if (i915_gem_context_is_closed(ctx)) { + err = -ENOENT; + goto out; + } + + if (vm == rcu_access_pointer(ctx->vm)) goto unlock; /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - mutex_lock(&ctx->mutex); lut_close(ctx); - mutex_unlock(&ctx->mutex); old = __set_ppgtt(ctx, vm); @@ -970,8 +967,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, } unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - + mutex_unlock(&ctx->mutex); out: i915_vm_put(vm); return err; @@ -1827,10 +1823,11 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_gem_context *src) { struct i915_address_space *vm; + int err = 0; rcu_read_lock(); do { - vm = READ_ONCE(src->vm); + vm = rcu_dereference(src->vm); if (!vm) break; @@ -1852,7 +1849,7 @@ static int clone_vm(struct i915_gem_context *dst, * it cannot be reallocated elsewhere. */ - if (vm == READ_ONCE(src->vm)) + if (vm == rcu_access_pointer(src->vm)) break; i915_vm_put(vm); @@ -1860,11 +1857,16 @@ static int clone_vm(struct i915_gem_context *dst, rcu_read_unlock(); if (vm) { - __assign_ppgtt(dst, vm); + if (!mutex_lock_interruptible(&dst->mutex)) { + __assign_ppgtt(dst, vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; + } i915_vm_put(vm); } - return 0; + return err; } static int create_clone(struct i915_user_extension __user *ext, void *data) @@ -1954,12 +1956,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); @@ -2086,10 +2083,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - if (ctx->vm) - args->value = ctx->vm->total; + rcu_read_lock(); + if (rcu_access_pointer(ctx->vm)) + args->value = rcu_dereference(ctx->vm)->total; else args->value = to_i915(dev)->ggtt.vm.total; + rcu_read_unlock(); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: @@ -2155,7 +2154,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; int ret; @@ -2177,7 +2176,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, */ if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); + args->reset_count = i915_reset_count(&i915->gpu_error); else args->reset_count = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 50bc27d30c034..9234586830d18 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -11,7 +11,9 @@ #include "gt/intel_context.h" +#include "i915_drv.h" #include "i915_gem.h" +#include "i915_gem_gtt.h" #include "i915_scheduler.h" #include "intel_device_info.h" @@ -118,8 +120,8 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) } /* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); +int __must_check i915_gem_init_contexts(struct drm_i915_private *i915); +void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); @@ -158,6 +160,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_address_space * +i915_gem_context_vm(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); +} + +static inline struct i915_address_space * +i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (!vm) + vm = &ctx->i915->ggtt.vm; + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + static inline struct i915_gem_engines * i915_gem_context_engines(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 87be27877e223..ab8e1367dfc81 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -88,7 +88,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space *vm; + struct i915_address_space __rcu *vm; /** * @pid: process id of creator diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 42c15e8bf1666..580c15e74a0a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -728,7 +728,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) + if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; eb->context_flags = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 3798502097b41..ef5e723fd057f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -766,7 +766,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = dev_priv->kernel_context->vm; + vm = rcu_dereference_protected(dev_priv->kernel_context->vm, + true); /* static vm */ if (!vm || !vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 98b2a6ccfcc1a..3314858f30461 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1322,15 +1322,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; + struct i915_address_space *vm; struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1340,9 +1340,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1451,6 +1452,8 @@ static int igt_ppgtt_pin_update(void *arg) i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1460,7 +1463,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1510,6 +1513,7 @@ static int igt_tmpfs_fallback(void *arg) out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1517,14 +1521,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; - int err; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1533,12 +1537,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1607,6 +1613,8 @@ static int igt_shrink_thp(void *arg) i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1675,6 +1683,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) }; struct drm_file *file; struct i915_gem_context *ctx; + struct i915_address_space *vm; intel_wakeref_t wakeref; int err; @@ -1699,8 +1708,11 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) goto out_unlock; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 2fb31ada2fa7a..d44fa9d356f11 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -53,19 +53,17 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } @@ -79,7 +77,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } i915_request_add(rq); } @@ -87,7 +85,7 @@ static int live_nop_switch(void *arg) pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); err = -EIO; - goto out_unlock; + goto out_file; } times[1] = ktime_get_raw(); @@ -97,7 +95,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { @@ -107,7 +105,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n % nctx], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } /* @@ -143,7 +141,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -151,8 +149,7 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); +out_file: mock_file_free(i915, file); return err; } @@ -253,12 +250,10 @@ static int live_parallel_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_locked; + goto out_file; } engines = i915_gem_context_lock_engines(ctx); @@ -268,7 +263,7 @@ static int live_parallel_switch(void *arg) if (!data) { i915_gem_context_unlock_engines(ctx); err = -ENOMEM; - goto out_locked; + goto out; } m = 0; /* Use the first context as our template for the engines */ @@ -276,7 +271,7 @@ static int live_parallel_switch(void *arg) err = intel_context_pin(ce); if (err) { i915_gem_context_unlock_engines(ctx); - goto out_locked; + goto out; } data[m++].ce[0] = intel_context_get(ce); } @@ -287,7 +282,7 @@ static int live_parallel_switch(void *arg) ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_locked; + goto out; } for (m = 0; m < count; m++) { @@ -296,20 +291,18 @@ static int live_parallel_switch(void *arg) ce = intel_context_create(ctx, data[m].ce[0]->engine); if (IS_ERR(ce)) - goto out_locked; + goto out; err = intel_context_pin(ce); if (err) { intel_context_put(ce); - goto out_locked; + goto out; } data[m].ce[n] = ce; } } - mutex_unlock(&i915->drm.struct_mutex); - for (fn = func; !err && *fn; fn++) { struct igt_live_test t; int n; @@ -354,8 +347,7 @@ static int live_parallel_switch(void *arg) mutex_unlock(&i915->drm.struct_mutex); } - mutex_lock(&i915->drm.struct_mutex); -out_locked: +out: for (n = 0; n < count; n++) { for (m = 0; m < ARRAY_SIZE(data->ce); m++) { if (!data[n].ce[m]) @@ -365,8 +357,8 @@ static int live_parallel_switch(void *arg) intel_context_put(data[n].ce[m]); } } - mutex_unlock(&i915->drm.struct_mutex); kfree(data); +out_file: mock_file_free(i915, file); return err; } @@ -626,11 +618,9 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; @@ -642,7 +632,7 @@ static int igt_ctx_exec(void *arg) ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); @@ -654,7 +644,7 @@ static int igt_ctx_exec(void *arg) err = PTR_ERR(obj); intel_context_put(ce); kernel_context_close(ctx); - goto out_unlock; + goto out_file; } } @@ -663,17 +653,18 @@ static int igt_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!ctx->vm), err); + yesno(!!rcu_access_pointer(ctx->vm)), + err); intel_context_put(ce); kernel_context_close(ctx); - goto out_unlock; + goto out_file; } err = throttle(ce, tq, ARRAY_SIZE(tq)); if (err) { intel_context_put(ce); kernel_context_close(ctx); - goto out_unlock; + goto out_file; } if (++dw == max_dwords(obj)) { @@ -703,11 +694,10 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); if (err) @@ -742,22 +732,20 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; for_each_engine(engine, i915, id) { unsigned long ncontexts, ndwords, dw; @@ -781,7 +769,9 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, parent->vm); + mutex_unlock(&ctx->mutex); ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); @@ -801,7 +791,8 @@ static int igt_shared_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!ctx->vm), err); + yesno(!!rcu_access_pointer(ctx->vm)), + err); intel_context_put(ce); kernel_context_close(ctx); goto out_test; @@ -840,17 +831,13 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); } out_test: throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); return err; } @@ -1222,8 +1209,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915, if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); @@ -1278,8 +1263,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915, i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); @@ -1339,23 +1322,24 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->ggtt.alias->vm; + rcu_read_lock(); + vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { + rcu_read_unlock(); err = 0; - goto out_unlock; + goto out_file; } + rcu_read_unlock(); ndwords = 0; dw = 0; @@ -1373,7 +1357,7 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(obj)) { err = PTR_ERR(obj); i915_gem_context_unlock_engines(ctx); - goto out_unlock; + goto out_file; } if (prandom_u32_state(&prng) & 1) @@ -1384,15 +1368,17 @@ static int igt_ctx_readonly(void *arg) if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - ce->engine->name, yesno(!!ctx->vm), err); + ce->engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); i915_gem_context_unlock_engines(ctx); - goto out_unlock; + goto out_file; } err = throttle(ce, tq, ARRAY_SIZE(tq)); if (err) { i915_gem_context_unlock_engines(ctx); - goto out_unlock; + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1424,20 +1410,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1455,6 +1440,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1487,17 +1473,18 @@ static int write_to_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1523,6 +1510,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, i915_request_add(rq); + i915_vm_put(vm); return 0; skip_request: @@ -1531,6 +1519,8 @@ static int write_to_scratch(struct i915_gem_context *ctx, i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1542,6 +1532,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1586,17 +1577,18 @@ static int read_from_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1627,12 +1619,12 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto err_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto err_vm; } *value = cmd[result / sizeof(*cmd)]; @@ -1647,6 +1639,8 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1677,27 +1671,25 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + goto out_file; vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); @@ -1726,7 +1718,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_unlock; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1735,7 +1727,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_unlock; + goto out_file; } this++; @@ -1745,11 +1737,9 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %d engines\n", count, RUNTIME_INFO(i915)->num_engines); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); return err; } @@ -1781,13 +1771,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1860,8 +1846,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 0104f16b13270..74ddd682c9cd8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -42,7 +42,10 @@ mock_context(struct drm_i915_private *i915, if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } @@ -65,7 +68,7 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * @@ -74,8 +77,6 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) struct i915_gem_context *ctx; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 35a40c2820a28..be34d97ac18f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -221,12 +221,20 @@ intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; + GEM_BUG_ON(!engine->cops); kref_init(&ce->ref); ce->gem_context = ctx; - ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm); + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (vm) + ce->vm = i915_vm_get(vm); + else + ce->vm = i915_vm_get(&engine->gt->ggtt->vm); + rcu_read_unlock(); if (ctx->timeline) ce->timeline = intel_timeline_get(ctx->timeline); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 86cffbb0a9cb6..7c838a57e1746 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -155,13 +155,9 @@ static int live_context_size(void *arg) * HW tries to write past the end of one. */ - mutex_lock(>->i915->drm.struct_mutex); - fixme = kernel_context(gt->i915); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto unlock; - } + if (IS_ERR(fixme)) + return PTR_ERR(fixme); for_each_engine(engine, gt->i915, id) { struct { @@ -201,8 +197,6 @@ static int live_context_size(void *arg) } kernel_context_close(fixme); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -305,12 +299,10 @@ static int live_active_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } for_each_engine(engine, gt->i915, id) { @@ -323,8 +315,7 @@ static int live_active_context(void *arg) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } @@ -418,12 +409,10 @@ static int live_remote_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } for_each_engine(engine, gt->i915, id) { @@ -436,8 +425,7 @@ static int live_remote_context(void *arg) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index ffbb3d23b8873..e8a40df79bd01 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -58,9 +58,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; - mutex_lock(>->i915->drm.struct_mutex); h->ctx = kernel_context(gt->i915); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); @@ -133,7 +131,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; @@ -143,12 +141,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) int err; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + i915_vm_put(vm); return ERR_CAST(obj); + } vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); + i915_vm_put(vm); return ERR_CAST(vaddr); } @@ -159,16 +160,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + i915_vm_put(vm); return ERR_CAST(vma); + } hws = i915_vma_instance(h->hws, vm, NULL); - if (IS_ERR(hws)) + if (IS_ERR(hws)) { + i915_vm_put(vm); return ERR_CAST(hws); + } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) + if (err) { + i915_vm_put(vm); return ERR_PTR(err); + } err = i915_vma_pin(hws, 0, 0, PIN_USER); if (err) @@ -266,6 +273,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); + i915_vm_put(vm); return err ? ERR_PTR(err) : rq; } @@ -382,9 +390,7 @@ static int igt_reset_nop(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -458,9 +464,7 @@ static int igt_reset_nop_engine(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -705,9 +709,7 @@ static int active_engine(void *data) return PTR_ERR(file); for (count = 0; count < ARRAY_SIZE(ctx); count++) { - mutex_lock(&engine->i915->drm.struct_mutex); ctx[count] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); if (IS_ERR(ctx[count])) { err = PTR_ERR(ctx[count]); while (--count) @@ -1291,6 +1293,7 @@ static int igt_reset_evict_ppgtt(void *arg) { struct intel_gt *gt = arg; struct i915_gem_context *ctx; + struct i915_address_space *vm; struct drm_file *file; int err; @@ -1298,18 +1301,20 @@ static int igt_reset_evict_ppgtt(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } err = 0; - if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(gt, ctx->vm, + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_is_ggtt(vm)) { + /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(gt, vm, evict_vma, EXEC_OBJECT_WRITE); + } + i915_vm_put(vm); out: mock_file_free(gt->i915, file); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 04c1cf5736426..8dc42c5c7569c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1631,7 +1631,11 @@ static int smoke_submit(struct preempt_smoke *smoke, int err = 0; if (batch) { - vma = i915_vma_instance(batch, ctx->vm, NULL); + struct i915_address_space *vm; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(batch, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 4ee2e2babd0da..7c7aceb85a749 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -260,7 +260,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine, rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - kernel_context_close(ctx); if (IS_ERR(rq)) { spin = NULL; @@ -279,6 +278,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, if (err && spin) igt_spinner_end(spin); + kernel_context_close(ctx); return err; } @@ -355,6 +355,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_vma *vma; int err; @@ -362,7 +363,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -463,12 +466,15 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; + struct i915_address_space *vm; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1); + vm = i915_gem_context_get_vm_rcu(ctx); + scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1); + i915_vm_put(vm); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -1010,6 +1016,7 @@ static int live_isolated_whitelist(void *arg) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_address_space *vm; struct i915_gem_context *c; c = kernel_context(i915); @@ -1018,22 +1025,27 @@ static int live_isolated_whitelist(void *arg) goto err; } - client[i].scratch[0] = create_scratch(c->vm, 1024); + vm = i915_gem_context_get_vm_rcu(c); + + client[i].scratch[0] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); + i915_vm_put(vm); kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(c->vm, 1024); + client[i].scratch[1] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); + i915_vm_put(vm); kernel_context_close(c); goto err; } client[i].ctx = c; + i915_vm_put(vm); } for_each_engine(engine, i915, id) { diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 7052c90d937f6..f6419f0bd4fd4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -365,7 +365,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); + struct i915_ppgtt *ppgtt = + i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -378,6 +379,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } + + i915_vm_put(&ppgtt->vm); } static int @@ -1213,20 +1216,18 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; struct i915_gem_context *ctx; + struct i915_ppgtt *ppgtt; enum intel_engine_id i; int ret; - mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } + if (IS_ERR(ctx)) + return PTR_ERR(ctx); i915_gem_context_set_force_single_submission(ctx); - i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm)); + ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); + i915_context_ppgtt_root_save(s, ppgtt); for_each_engine(engine, i915, i) { struct intel_context *ce; @@ -1271,12 +1272,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); - mutex_unlock(&i915->drm.struct_mutex); return 0; out_shadow_ctx: - i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm)); + i915_context_ppgtt_root_restore(s, ppgtt); for_each_engine(engine, i915, i) { if (IS_ERR(s->shadow[i])) break; @@ -1284,9 +1285,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) intel_context_unpin(s->shadow[i]); intel_context_put(s->shadow[i]); } + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0e90ac608e07a..b04cebc26eca3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -316,12 +316,18 @@ static void print_context_stats(struct seq_file *m, struct drm_i915_private *i915) { struct file_stats kstats = {}; - struct i915_gem_context *ctx; + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &i915->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { intel_context_lock_pinned(ce); @@ -338,7 +344,9 @@ static void print_context_stats(struct seq_file *m, i915_gem_context_unlock_engines(ctx); if (!IS_ERR_OR_NULL(ctx->file_priv)) { - struct file_stats stats = { .vm = ctx->vm, }; + struct file_stats stats = { + .vm = rcu_access_pointer(ctx->vm), + }; struct drm_file *file = ctx->file_priv->file; struct task_struct *task; char name[80]; @@ -355,7 +363,12 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, name, stats); } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); print_file_stats(m, "[k]contexts", kstats); } @@ -363,7 +376,6 @@ static void print_context_stats(struct seq_file *m, static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); - int ret; seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n", i915->mm.shrink_count, @@ -372,12 +384,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_putc(m, '\n'); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - print_context_stats(m, i915); - mutex_unlock(&i915->drm.struct_mutex); return 0; } @@ -1579,19 +1586,19 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) static int i915_context_status(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + struct drm_i915_private *i915 = node_to_i915(m->private); + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + seq_puts(m, "HW context "); if (ctx->pid) { struct task_struct *task; @@ -1626,9 +1633,12 @@ static int i915_context_status(struct seq_file *m, void *unused) i915_gem_context_unlock_engines(ctx); seq_putc(m, '\n'); - } - mutex_unlock(&dev->struct_mutex); + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock(&i915->gem.contexts.lock); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b711324ca2948..5183a40d3860b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1702,10 +1702,8 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - mutex_lock(&dev->struct_mutex); i915_gem_context_close(file); i915_gem_release(dev, file); - mutex_unlock(&dev->struct_mutex); kfree_rcu(file_priv, rcu); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6bdcffbf1b9b5..35b610d523790 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1536,13 +1536,6 @@ struct drm_i915_private { int audio_power_refcount; u32 audio_freq_cntrl; - struct { - struct mutex mutex; - struct list_head list; - struct llist_head free_list; - struct work_struct free_work; - } contexts; - u32 fdi_rx_config; /* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */ @@ -1698,6 +1691,14 @@ struct drm_i915_private { struct { struct notifier_block pm_notifier; + + struct i915_gem_contexts { + spinlock_t lock; /* locks list */ + struct list_head list; + + struct llist_head free_list; + struct work_struct free_work; + } contexts; } gem; /* For i945gm vblank irq vs. C3 workaround */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a1077919b16a3..b6d279987b713 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1266,7 +1266,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_contexts_init(dev_priv); + ret = i915_gem_init_contexts(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_scratch; @@ -1348,7 +1348,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) } err_context: if (ret != -EIO) - i915_gem_contexts_fini(dev_priv); + i915_gem_driver_release__contexts(dev_priv); err_scratch: intel_gt_driver_release(&dev_priv->gt); err_unlock: @@ -1416,11 +1416,9 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); intel_engines_cleanup(dev_priv); - i915_gem_contexts_fini(dev_priv); + i915_gem_driver_release__contexts(dev_priv); intel_gt_driver_release(&dev_priv->gt); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_wa_list_free(&dev_priv->gt_wa_list); @@ -1430,7 +1428,7 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) i915_gem_drain_freed_objects(dev_priv); - WARN_ON(!list_empty(&dev_priv->contexts.list)); + WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); } void i915_gem_init_mmio(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1d26634ca597b..7b15bb8919702 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1366,7 +1366,9 @@ static int gen8_init_scratch(struct i915_address_space *vm) if (vm->has_read_only && vm->i915->kernel_context && vm->i915->kernel_context->vm) { - struct i915_address_space *clone = vm->i915->kernel_context->vm; + struct i915_address_space *clone = + rcu_dereference_protected(vm->i915->kernel_context->vm, + true); /* static */ GEM_BUG_ON(!clone->has_read_only); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ecfbc37b738bb..231388d06c82c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1853,8 +1853,8 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }; #undef ctx_flexeuN struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - int i; + struct i915_gem_context *ctx, *cn; + int i, err; for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); @@ -1877,16 +1877,27 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, * context. Contexts idle at the time of reconfiguration are not * trapped behind the barrier. */ - list_for_each_entry(ctx, &i915->contexts.list, link) { - int err; - + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { if (ctx == i915->kernel_context) continue; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); - if (err) + if (err) { + i915_gem_context_put(ctx); return err; + } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); /* * After updating all other contexts, we need to modify ourselves. @@ -1895,7 +1906,6 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, */ for_each_uabi_engine(engine, i915) { struct intel_context *ce = engine->kernel_context; - int err; if (engine->class != RENDER_CLASS) continue; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1e08c5961535b..bf039b8ba593b 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -176,16 +176,12 @@ i915_l3_read(struct file *filp, struct kobject *kobj, count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count); memset(buf, 0, count); - ret = i915_mutex_lock_interruptible(&i915->drm); - if (ret) - return ret; - + spin_lock(&i915->gem.contexts.lock); if (i915->l3_parity.remap_info[slice]) memcpy(buf, i915->l3_parity.remap_info[slice] + offset / sizeof(u32), count); - - mutex_unlock(&i915->drm.struct_mutex); + spin_unlock(&i915->gem.contexts.lock); return count; } @@ -198,8 +194,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; + u32 *remap_info, *freeme = NULL; struct i915_gem_context *ctx; - u32 **remap_info; int ret; ret = l3_access_valid(i915, offset); @@ -209,37 +205,36 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (count < sizeof(u32)) return -EINVAL; - ret = i915_mutex_lock_interruptible(&i915->drm); - if (ret) - return ret; + remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); + if (!remap_info) + return -ENOMEM; - remap_info = &i915->l3_parity.remap_info[slice]; - if (!*remap_info) { - *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); - if (!*remap_info) { - ret = -ENOMEM; - goto out; - } + spin_lock(&i915->gem.contexts.lock); + + if (i915->l3_parity.remap_info[slice]) { + freeme = remap_info; + remap_info = i915->l3_parity.remap_info[slice]; + } else { + i915->l3_parity.remap_info[slice] = remap_info; } count = round_down(count, sizeof(u32)); - memcpy(*remap_info + offset / sizeof(u32), buf, count); + memcpy(remap_info + offset / sizeof(u32), buf, count); /* NB: We defer the remapping until we switch to the context */ - list_for_each_entry(ctx, &i915->contexts.list, link) + list_for_each_entry(ctx, &i915->gem.contexts.list, link) ctx->remap_slice |= BIT(slice); + spin_unlock(&i915->gem.contexts.lock); + kfree(freeme); + /* * TODO: Ideally we really want a GPU reset here to make sure errors * aren't propagated. Since I cannot find a stable way to reset the GPU * at this point it is left as a TODO. */ - ret = count; -out: - mutex_unlock(&i915->drm.struct_mutex); - - return ret; + return count; } static const struct bin_attribute dpf_attrs = { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 1f2cf6cfafb5d..7ef7a1e1664c6 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -952,7 +952,7 @@ DECLARE_EVENT_CLASS(i915_context, TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->vm = ctx->vm; + __entry->vm = rcu_access_pointer(ctx->vm); ), TP_printk("dev=%u, ctx=%p, ctx_vm=%p", diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 0346c3e5b6b67..bfa40a5b6d98a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -138,11 +138,9 @@ static int igt_gem_suspend(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto out; @@ -157,9 +155,7 @@ static int igt_gem_suspend(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); out: mock_file_free(i915, file); return err; @@ -177,11 +173,9 @@ static int igt_gem_hibernate(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto out; @@ -196,9 +190,7 @@ static int igt_gem_hibernate(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); out: mock_file_free(i915, file); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f39f0282e78c7..0af9a58d011dd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -473,7 +473,6 @@ static int igt_evict_contexts(void *arg) } count = 0; - mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { struct i915_request *rq; @@ -510,8 +509,6 @@ static int igt_evict_contexts(void *arg) count++; err = 0; } while(1); - mutex_unlock(&i915->drm.struct_mutex); - onstack_fence_fini(&fence); pr_info("Submitted %lu contexts/requests on %s\n", count, engine->name); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e40e6cfa51f12..8d8121c021619 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1246,6 +1246,7 @@ static int exercise_mock(struct drm_i915_private *i915, unsigned long end_time)) { const u64 limit = totalram_pages() << PAGE_SHIFT; + struct i915_address_space *vm; struct i915_gem_context *ctx; IGT_TIMEOUT(end_time); int err; @@ -1254,7 +1255,9 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time); + vm = i915_gem_context_get_vm_rcu(ctx); + err = func(i915, vm, 0, min(vm->total, limit), end_time); + i915_vm_put(vm); mock_context_close(ctx); return err; @@ -1801,15 +1804,15 @@ static int igt_cs_tlb(void *arg) goto out_unlock; } - vm = ctx->vm; - if (!vm) - goto out_unlock; + vm = i915_gem_context_get_vm_rcu(ctx); + if (i915_is_ggtt(vm)) + goto out_vm; /* Create two pages; dummy we prefill the TLB, and intended */ bbe = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(bbe)) { err = PTR_ERR(bbe); - goto out_unlock; + goto out_vm; } batch = i915_gem_object_pin_map(bbe, I915_MAP_WC); @@ -2014,6 +2017,8 @@ static int igt_cs_tlb(void *arg) i915_gem_object_put(act); out_put_bbe: i915_gem_object_put(bbe); +out_vm: + i915_vm_put(vm); out_unlock: mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index d7d68c6a6bd50..0897a7b049443 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -181,9 +181,7 @@ static int igt_request_rewind(void *arg) struct intel_context *ce; int err = -EINVAL; - mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); - mutex_unlock(&i915->drm.struct_mutex); ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -197,9 +195,7 @@ static int igt_request_rewind(void *arg) i915_request_get(request); i915_request_add(request); - mutex_lock(&i915->drm.struct_mutex); ctx[1] = mock_context(i915, "B"); - mutex_unlock(&i915->drm.struct_mutex); ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -438,9 +434,7 @@ static int mock_breadcrumbs_smoketest(void *arg) } for (n = 0; n < t.ncontexts; n++) { - mutex_lock(&t.engine->i915->drm.struct_mutex); t.contexts[n] = mock_context(t.engine->i915, "mock"); - mutex_unlock(&t.engine->i915->drm.struct_mutex); if (!t.contexts[n]) { ret = -ENOMEM; goto out_contexts; @@ -734,9 +728,9 @@ static int live_empty_request(void *arg) static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct drm_i915_gem_object *obj; const int gen = INTEL_GEN(i915); + struct i915_address_space *vm; struct i915_vma *vma; u32 *cmd; int err; @@ -745,7 +739,9 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (IS_ERR(obj)) return ERR_CAST(obj); + vm = i915_gem_context_get_vm_rcu(ctx); vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -1220,9 +1216,7 @@ static int live_breadcrumbs_smoketest(void *arg) } for (n = 0; n < t[0].ncontexts; n++) { - mutex_lock(&i915->drm.struct_mutex); t[0].contexts[n] = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); if (!t[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index ac1ff558eb907..58b5f40a07dd6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -24,6 +24,7 @@ #include +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" #include "i915_scatterlist.h" @@ -38,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != ctx->vm) { + if (vma->vm != rcu_access_pointer(ctx->vm)) { pr_err("VMA created with wrong VM\n"); ok = false; } @@ -113,11 +114,13 @@ static int create_vmas(struct drm_i915_private *i915, list_for_each_entry(obj, objects, st_link) { for (pinned = 0; pinned <= 1; pinned++) { list_for_each_entry(ctx, contexts, link) { - struct i915_address_space *vm = ctx->vm; + struct i915_address_space *vm; struct i915_vma *vma; int err; + vm = i915_gem_context_get_vm_rcu(ctx); vma = checked_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 4e6cde0d48593..335f37ba98dec 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -59,11 +59,9 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_workqueue(i915); - mutex_lock(&i915->drm.struct_mutex); for_each_engine(engine, i915, id) mock_engine_free(engine); - i915_gem_contexts_fini(i915); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_driver_release__contexts(i915); intel_timelines_fini(i915); @@ -206,7 +204,7 @@ struct drm_i915_private *mock_gem_device(void) return i915; err_context: - i915_gem_contexts_fini(i915); + i915_gem_driver_release__contexts(i915); err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: From cb5eb072788faea188bfabcee0bfb90ecc717e82 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:10 +0100 Subject: [PATCH 320/331] drm/i915/overlay: Drop struct_mutex guard The overlay uses the modeset mutex to control itself and only required the struct_mutex for requests, which is now obsolete. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_display.c | 7 +------ drivers/gpu/drm/i915/display/intel_overlay.c | 13 ------------- drivers/gpu/drm/i915/gt/intel_reset.c | 4 ---- 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 8f7365b8dffb7..f3a94a9cff163 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5738,13 +5738,8 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) { - if (intel_crtc->overlay) { - struct drm_device *dev = intel_crtc->base.dev; - - mutex_lock(&dev->struct_mutex); + if (intel_crtc->overlay) (void) intel_overlay_switch_off(intel_crtc->overlay); - mutex_unlock(&dev->struct_mutex); - } /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index e12e1a753af06..daea112cbb870 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -439,8 +439,6 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) struct i915_request *rq; u32 *cs; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* * Only wait if there is actually an old frame to release to * guarantee forward progress. @@ -751,7 +749,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct i915_vma *vma; int ret, tmp_width; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_release_old_vid(overlay); @@ -852,7 +849,6 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_recover_from_interrupt(overlay); @@ -1068,11 +1064,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (!(params->flags & I915_OVERLAY_ENABLE)) { drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); - ret = intel_overlay_switch_off(overlay); - - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; @@ -1088,7 +1080,6 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, return -ENOENT; drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); @@ -1152,14 +1143,12 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (ret != 0) goto out_unlock; - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); return 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); @@ -1233,7 +1222,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, } drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); ret = -EINVAL; if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) { @@ -1290,7 +1278,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, ret = 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 055496f0825fc..7b3d9d4517a03 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1288,10 +1288,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) return -EIO; - /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(>->i915->drm.struct_mutex)) - return -EAGAIN; - if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) From ba198a10bfbeb1137d8c25fa569b562e8b626b05 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:11 +0100 Subject: [PATCH 321/331] drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info It protects nothing being accessed for the intel_framebuffer, so it's own locking had better be sufficient. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b04cebc26eca3..77933b23070ef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1534,11 +1534,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_framebuffer *fbdev_fb = NULL; struct drm_framebuffer *drm_fb; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; #ifdef CONFIG_DRM_FBDEV_EMULATION if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) { @@ -1573,7 +1568,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) seq_putc(m, '\n'); } mutex_unlock(&dev->mode_config.fb_lock); - mutex_unlock(&dev->struct_mutex); return 0; } From 6c3828ab098a8091c0fa8a24d1af8a5b779b6b4d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:12 +0100 Subject: [PATCH 322/331] drm/i915: Remove struct_mutex guard for debugfs/opregion Having a struct_mutex around the read of a BIOS blob serves no purpose. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 77933b23070ef..298a3e879e653 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1500,21 +1500,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) static int i915_opregion(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_opregion *opregion = &dev_priv->opregion; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; + struct intel_opregion *opregion = &node_to_i915(m->private)->opregion; if (opregion->header) seq_write(m, opregion->header, OPREGION_SIZE); - mutex_unlock(&dev->struct_mutex); - -out: return 0; } From 3d88f76dec55321d27bd37204ea9eaa44f7485b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:13 +0100 Subject: [PATCH 323/331] drm/i915: Drop struct_mutex from suspend state save/restore struct_mutex provides no serialisation of the registers and data structures being saved and restored across suspend/resume. It is completely superfluous here. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_suspend.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 2b2086def0f11..8812cdd9007f4 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -66,8 +66,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - i915_save_display(dev_priv); if (IS_GEN(dev_priv, 4)) @@ -101,8 +99,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) dev_priv->regfile.saveSWF3[i] = I915_READ(SWF3(i)); } - mutex_unlock(&dev_priv->drm.struct_mutex); - return 0; } @@ -111,8 +107,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - if (IS_GEN(dev_priv, 4)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); @@ -146,8 +140,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]); } - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gmbus_reset(dev_priv); return 0; From 2af402982ab38223356250d81741ae041c831a79 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:14 +0100 Subject: [PATCH 324/331] drm/i915/selftests: Drop vestigal struct_mutex guards We no longer need struct_mutex to serialise request emission, so remove it from the gt selftests. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-20-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 15 +- .../drm/i915/gem/selftests/i915_gem_context.c | 4 - .../drm/i915/gem/selftests/i915_gem_mman.c | 2 - .../drm/i915/gem/selftests/i915_gem_phys.c | 2 - drivers/gpu/drm/i915/gt/selftest_lrc.c | 148 +++--------------- .../gpu/drm/i915/gt/selftest_workarounds.c | 11 +- drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 4 - drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 - 8 files changed, 27 insertions(+), 161 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 3314858f30461..e42abddd4a36c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1639,7 +1639,6 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); @@ -1665,9 +1664,7 @@ int i915_gem_huge_page_mock_selftests(void) i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } @@ -1684,7 +1681,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) struct drm_file *file; struct i915_gem_context *ctx; struct i915_address_space *vm; - intel_wakeref_t wakeref; int err; if (!HAS_PPGTT(i915)) { @@ -1699,13 +1695,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } mutex_lock(&ctx->mutex); @@ -1716,11 +1709,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); - return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d44fa9d356f11..fb58c0919ea1c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -307,9 +307,7 @@ static int live_parallel_switch(void *arg) struct igt_live_test t; int n; - mutex_lock(&i915->drm.struct_mutex); err = igt_live_test_begin(&t, i915, __func__, ""); - mutex_unlock(&i915->drm.struct_mutex); if (err) break; @@ -341,10 +339,8 @@ static int live_parallel_switch(void *arg) data[n].tsk = NULL; } - mutex_lock(&i915->drm.struct_mutex); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); } out: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 1cd25cfd0246f..cfa52c525691c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -669,9 +669,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db81..34932871b3a59 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8dc42c5c7569c..393ae5321e1dc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -26,17 +26,13 @@ static int live_sanitycheck(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; struct igt_spinner spin; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin, &i915->gt)) - goto err_unlock; + return -ENOMEM; ctx = kernel_context(i915); if (!ctx) @@ -73,9 +69,6 @@ static int live_sanitycheck(void *arg) kernel_context_close(ctx); err_spin: igt_spinner_fini(&spin); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -402,7 +395,6 @@ static int live_timeslice_preempt(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; struct i915_vma *vma; void *vaddr; int err = 0; @@ -417,14 +409,9 @@ static int live_timeslice_preempt(void *arg) * ready task. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { @@ -469,10 +456,6 @@ static int live_timeslice_preempt(void *arg) i915_gem_object_unpin_map(obj); err_obj: i915_gem_object_put(obj); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } @@ -484,7 +467,6 @@ static int live_busywait_preempt(void *arg) struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; u32 *map; @@ -493,12 +475,9 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx_hi = kernel_context(i915); if (!ctx_hi) - goto err_unlock; + return -ENOMEM; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); @@ -652,9 +631,6 @@ static int live_busywait_preempt(void *arg) kernel_context_close(ctx_lo); err_ctx_hi: kernel_context_close(ctx_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -683,7 +659,6 @@ static int live_preempt(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) @@ -692,11 +667,8 @@ static int live_preempt(void *arg) if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) pr_err("Logical preemption supported, but not exposed\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + return -ENOMEM; if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; @@ -776,9 +748,6 @@ static int live_preempt(void *arg) igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -790,17 +759,13 @@ static int live_late_preempt(void *arg) struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + return -ENOMEM; if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; @@ -882,9 +847,6 @@ static int live_late_preempt(void *arg) igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -929,7 +891,6 @@ static int live_nopreempt(void *arg) struct intel_engine_cs *engine; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -940,11 +901,8 @@ static int live_nopreempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &a)) - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &b)) goto err_client_a; b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); @@ -1018,9 +976,6 @@ static int live_nopreempt(void *arg) preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1040,7 +995,6 @@ static int live_suppress_self_preempt(void *arg) }; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -1059,11 +1013,8 @@ static int live_suppress_self_preempt(void *arg) if (intel_vgpu_active(i915)) return 0; /* GVT forces single port & request submission */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &a)) - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &b)) goto err_client_a; @@ -1144,9 +1095,6 @@ static int live_suppress_self_preempt(void *arg) preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1216,7 +1164,6 @@ static int live_suppress_wait_preempt(void *arg) struct preempt_client client[4]; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; int i; @@ -1229,11 +1176,8 @@ static int live_suppress_wait_preempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ goto err_client_0; if (preempt_client_init(i915, &client[2])) /* head of queue */ @@ -1319,9 +1263,6 @@ static int live_suppress_wait_preempt(void *arg) preempt_client_fini(&client[1]); err_client_0: preempt_client_fini(&client[0]); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1338,7 +1279,6 @@ static int live_chain_preempt(void *arg) struct intel_engine_cs *engine; struct preempt_client hi, lo; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -1350,11 +1290,8 @@ static int live_chain_preempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &hi)) - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &lo)) goto err_client_hi; @@ -1465,9 +1402,6 @@ static int live_chain_preempt(void *arg) preempt_client_fini(&lo); err_client_hi: preempt_client_fini(&hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1485,7 +1419,6 @@ static int live_preempt_hang(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) @@ -1494,11 +1427,8 @@ static int live_preempt_hang(void *arg) if (!intel_has_reset_engine(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + return -ENOMEM; if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; @@ -1590,9 +1520,6 @@ static int live_preempt_hang(void *arg) igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1684,11 +1611,9 @@ static int smoke_crescendo_thread(void *arg) struct i915_gem_context *ctx = smoke_context(smoke); int err; - mutex_lock(&smoke->i915->drm.struct_mutex); err = smoke_submit(smoke, ctx, count % I915_PRIORITY_MAX, smoke->batch); - mutex_unlock(&smoke->i915->drm.struct_mutex); if (err) return err; @@ -1709,8 +1634,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - mutex_unlock(&smoke->i915->drm.struct_mutex); - for_each_engine(engine, smoke->i915, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -1743,8 +1666,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) put_task_struct(tsk[id]); } - mutex_lock(&smoke->i915->drm.struct_mutex); - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); @@ -1787,7 +1708,6 @@ static int live_preempt_smoke(void *arg) .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; - intel_wakeref_t wakeref; struct igt_live_test t; int err = -ENOMEM; u32 *cs; @@ -1802,13 +1722,10 @@ static int live_preempt_smoke(void *arg) if (!smoke.contexts) return -ENOMEM; - mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm); - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { err = PTR_ERR(smoke.batch); - goto err_unlock; + goto err_free; } cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); @@ -1855,9 +1772,7 @@ static int live_preempt_smoke(void *arg) err_batch: i915_gem_object_put(smoke.batch); -err_unlock: - intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref); - mutex_unlock(&smoke.i915->drm.struct_mutex); +err_free: kfree(smoke.contexts); return err; @@ -1995,19 +1910,17 @@ static int live_virtual_engine(void *arg) struct intel_gt *gt = &i915->gt; enum intel_engine_id id; unsigned int class, inst; - int err = -ENODEV; + int err; if (USES_GUC_SUBMISSION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { err = nop_virtual_engine(i915, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", engine->name, err); - goto out_unlock; + return err; } } @@ -2028,17 +1941,15 @@ static int live_virtual_engine(void *arg) err = nop_virtual_engine(i915, siblings, nsibling, n, 0); if (err) - goto out_unlock; + return err; } err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int mask_virtual_engine(struct drm_i915_private *i915, @@ -2117,9 +2028,6 @@ static int mask_virtual_engine(struct drm_i915_private *i915, } err = igt_live_test_end(&t); - if (err) - goto out; - out: if (igt_flush_test(i915)) err = -EIO; @@ -2142,13 +2050,11 @@ static int live_virtual_mask(void *arg) struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; if (USES_GUC_SUBMISSION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; @@ -2164,12 +2070,10 @@ static int live_virtual_mask(void *arg) err = mask_virtual_engine(i915, siblings, nsibling); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int bond_virtual_engine(struct drm_i915_private *i915, @@ -2320,13 +2224,11 @@ static int live_virtual_bond(void *arg) struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; if (USES_GUC_SUBMISSION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { const struct phase *p; int nsibling; @@ -2349,14 +2251,12 @@ static int live_virtual_bond(void *arg) if (err) { pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", __func__, p->name, class, nsibling, err); - goto out_unlock; + return err; } } } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } int intel_execlists_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 7c7aceb85a749..95627e80f2463 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -708,9 +708,7 @@ static int live_dirty_whitelist(void *arg) wakeref = intel_runtime_pm_get(&i915->runtime_pm); - mutex_unlock(&i915->drm.struct_mutex); file = mock_file(i915); - mutex_lock(&i915->drm.struct_mutex); if (IS_ERR(file)) { err = PTR_ERR(file); goto out_rpm; @@ -732,9 +730,7 @@ static int live_dirty_whitelist(void *arg) } out_file: - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); - mutex_lock(&i915->drm.struct_mutex); out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; @@ -1274,14 +1270,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gpu_reset_workarounds), SUBTEST(live_engine_reset_workarounds), }; - int err; if (intel_gt_is_wedged(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bba0eafe1cdb8..f927f851aadfe 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -116,7 +116,6 @@ static int igt_guc_clients(void *args) int err = 0; GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); guc = &dev_priv->gt.uc.guc; @@ -190,7 +189,6 @@ static int igt_guc_clients(void *args) guc_clients_enable(guc); unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); return err; } @@ -208,7 +206,6 @@ static int igt_guc_doorbells(void *arg) u16 db_id; GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); guc = &dev_priv->gt.uc.guc; @@ -299,7 +296,6 @@ static int igt_guc_doorbells(void *arg) } unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8d8121c021619..165b3a7f97443 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1797,7 +1797,6 @@ static int igt_cs_tlb(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); @@ -2020,7 +2019,6 @@ static int igt_cs_tlb(void *arg) out_vm: i915_vm_put(vm); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } From 7842793330ce96ac1d20d22901fb4009c21676f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Oct 2019 14:40:15 +0100 Subject: [PATCH 325/331] drm/i915: Drop struct_mutex from around GEM initialisation We no longer need to placate lockdep by holding struct_mutex for our initialisation, so don't. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 -- drivers/gpu/drm/i915/i915_gem.c | 9 --------- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 7 ------- 3 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 7c316d4633db8..7987b54fb1f51 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -174,7 +174,6 @@ void i915_gem_resume(struct drm_i915_private *i915) { GEM_TRACE("\n"); - mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); if (intel_gt_init_hw(&i915->gt)) @@ -198,7 +197,6 @@ void i915_gem_resume(struct drm_i915_private *i915) out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); return; err_wedged: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b6d279987b713..80f3153c48dcd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1249,7 +1249,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * we hold the forcewake during initialisation these problems * just magically go away. */ - mutex_lock(&dev_priv->drm.struct_mutex); intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); ret = i915_init_ggtt(dev_priv); @@ -1319,7 +1318,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_gt; intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; @@ -1330,15 +1328,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * driver doesn't explode during runtime. */ err_gt: - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gt_set_wedged_on_init(&dev_priv->gt); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); i915_gem_drain_workqueue(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); err_init_hw: intel_uc_fini_hw(&dev_priv->gt.uc); err_uc_init: @@ -1353,7 +1347,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_gt_driver_release(&dev_priv->gt); err_unlock: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); if (ret != -EIO) { intel_uc_cleanup_firmwares(&dev_priv->gt.uc); @@ -1406,10 +1399,8 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(&dev_priv->gt.uc); intel_uc_fini(&dev_priv->gt.uc); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_drain_freed_objects(dev_priv); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 335f37ba98dec..70a7026db08dd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -68,10 +68,7 @@ static void mock_device_release(struct drm_device *dev) drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(&i915->ggtt); - mutex_unlock(&i915->drm.struct_mutex); - destroy_workqueue(i915->wq); i915_gemfs_fini(i915); @@ -179,8 +176,6 @@ struct drm_i915_private *mock_gem_device(void) intel_timelines_init(i915); - mutex_lock(&i915->drm.struct_mutex); - mock_init_ggtt(i915, &i915->ggtt); mkwrite_device_info(i915)->engine_mask = BIT(0); @@ -197,7 +192,6 @@ struct drm_i915_private *mock_gem_device(void) goto err_context; intel_engines_driver_register(i915); - mutex_unlock(&i915->drm.struct_mutex); WARN_ON(i915_gemfs_init(i915)); @@ -208,7 +202,6 @@ struct drm_i915_private *mock_gem_device(void) err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: - mutex_unlock(&i915->drm.struct_mutex); intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: From 7d423af9bfb172c3802cc1ca6591ac608c26a311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 3 Oct 2019 17:02:31 +0300 Subject: [PATCH 326/331] drm/i915: Implement a better i945gm vblank irq vs. C-states workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current "disable C3+" workaround for the delayed vblank irqs on i945gm no longer works. I'm not sure what changed, but now I need to also disable C2. I also got my hands on a i915gm machine that suffers from the same issue. After some furious poking of registers I managed to find a better workaround: The "Do not Turn off Core Render Clock in C states" bit. With that I no longer have to disable any C-states, and as a nice bonus the power cost is only ~1/4 of the "disable C3+" method (which mind you doesn't even work anymore, and so would have an even higher power cost if we made it work by also disabling C2). So let's throw out all the cpuidle/qos crap and just toggle the magic bit as needed. And we extend the workaround to cover i915gm as well. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191003140231.24408-1-ville.syrjala@linux.intel.com Acked-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_display.c | 10 +-- drivers/gpu/drm/i915/i915_drv.h | 9 +-- drivers/gpu/drm/i915/i915_irq.c | 79 +++----------------- drivers/gpu/drm/i915/i915_irq.h | 4 +- drivers/gpu/drm/i915/i915_reg.h | 1 + 5 files changed, 22 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f3a94a9cff163..5acc39f32d0c9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15060,12 +15060,12 @@ static const struct drm_crtc_funcs i965_crtc_funcs = { .disable_vblank = i965_disable_vblank, }; -static const struct drm_crtc_funcs i945gm_crtc_funcs = { +static const struct drm_crtc_funcs i915gm_crtc_funcs = { INTEL_CRTC_FUNCS, .get_vblank_counter = i915_get_vblank_counter, - .enable_vblank = i945gm_enable_vblank, - .disable_vblank = i945gm_disable_vblank, + .enable_vblank = i915gm_enable_vblank, + .disable_vblank = i915gm_disable_vblank, }; static const struct drm_crtc_funcs i915_crtc_funcs = { @@ -15136,8 +15136,8 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) funcs = &g4x_crtc_funcs; else if (IS_GEN(dev_priv, 4)) funcs = &i965_crtc_funcs; - else if (IS_I945GM(dev_priv)) - funcs = &i945gm_crtc_funcs; + else if (IS_I945GM(dev_priv) || IS_I915GM(dev_priv)) + funcs = &i915gm_crtc_funcs; else if (IS_GEN(dev_priv, 3)) funcs = &i915_crtc_funcs; else diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 35b610d523790..cde4c7fb5570c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1701,13 +1701,8 @@ struct drm_i915_private { } contexts; } gem; - /* For i945gm vblank irq vs. C3 workaround */ - struct { - struct work_struct work; - struct pm_qos_request pm_qos; - u8 c3_disable_latency; - u8 enabled; - } i945gm_vblank; + /* For i915gm/i945gm vblank irq workaround */ + u8 vblank_enabled; /* perform PHY state sanity checks? */ bool chv_phy_assert[2]; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index bc83f094065ae..f2371b6083c6d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -29,7 +29,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include @@ -2910,12 +2909,18 @@ int i8xx_enable_vblank(struct drm_crtc *crtc) return 0; } -int i945gm_enable_vblank(struct drm_crtc *crtc) +int i915gm_enable_vblank(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); - if (dev_priv->i945gm_vblank.enabled++ == 0) - schedule_work(&dev_priv->i945gm_vblank.work); + /* + * Vblank interrupts fail to wake the device up from C2+. + * Disabling render clock gating during C-states avoids + * the problem. There is a small power cost so we do this + * only when vblank interrupts are actually enabled. + */ + if (dev_priv->vblank_enabled++ == 0) + I915_WRITE(SCPD0, _MASKED_BIT_ENABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE)); return i8xx_enable_vblank(crtc); } @@ -2988,14 +2993,14 @@ void i8xx_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -void i945gm_disable_vblank(struct drm_crtc *crtc) +void i915gm_disable_vblank(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); i8xx_disable_vblank(crtc); - if (--dev_priv->i945gm_vblank.enabled == 0) - schedule_work(&dev_priv->i945gm_vblank.work); + if (--dev_priv->vblank_enabled == 0) + I915_WRITE(SCPD0, _MASKED_BIT_DISABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE)); } void i965_disable_vblank(struct drm_crtc *crtc) @@ -3034,60 +3039,6 @@ void bdw_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static void i945gm_vblank_work_func(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, i945gm_vblank.work); - - /* - * Vblank interrupts fail to wake up the device from C3, - * hence we want to prevent C3 usage while vblank interrupts - * are enabled. - */ - pm_qos_update_request(&dev_priv->i945gm_vblank.pm_qos, - READ_ONCE(dev_priv->i945gm_vblank.enabled) ? - dev_priv->i945gm_vblank.c3_disable_latency : - PM_QOS_DEFAULT_VALUE); -} - -static int cstate_disable_latency(const char *name) -{ - const struct cpuidle_driver *drv; - int i; - - drv = cpuidle_get_driver(); - if (!drv) - return 0; - - for (i = 0; i < drv->state_count; i++) { - const struct cpuidle_state *state = &drv->states[i]; - - if (!strcmp(state->name, name)) - return state->exit_latency ? - state->exit_latency - 1 : 0; - } - - return 0; -} - -static void i945gm_vblank_work_init(struct drm_i915_private *dev_priv) -{ - INIT_WORK(&dev_priv->i945gm_vblank.work, - i945gm_vblank_work_func); - - dev_priv->i945gm_vblank.c3_disable_latency = - cstate_disable_latency("C3"); - pm_qos_add_request(&dev_priv->i945gm_vblank.pm_qos, - PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); -} - -static void i945gm_vblank_work_fini(struct drm_i915_private *dev_priv) -{ - cancel_work_sync(&dev_priv->i945gm_vblank.work); - pm_qos_remove_request(&dev_priv->i945gm_vblank.pm_qos); -} - static void ibx_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -4298,9 +4249,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; - if (IS_I945GM(dev_priv)) - i945gm_vblank_work_init(dev_priv); - intel_hpd_init_work(dev_priv); INIT_WORK(&rps->work, gen6_pm_rps_work); @@ -4388,9 +4336,6 @@ void intel_irq_fini(struct drm_i915_private *i915) { int i; - if (IS_I945GM(i915)) - i945gm_vblank_work_fini(i915); - for (i = 0; i < MAX_L3_SLICES; ++i) kfree(i915->l3_parity.remap_info[i]); } diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index 8e7e6071777e3..19a3bc019535f 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -122,12 +122,12 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc); u32 g4x_get_vblank_counter(struct drm_crtc *crtc); int i8xx_enable_vblank(struct drm_crtc *crtc); -int i945gm_enable_vblank(struct drm_crtc *crtc); +int i915gm_enable_vblank(struct drm_crtc *crtc); int i965_enable_vblank(struct drm_crtc *crtc); int ilk_enable_vblank(struct drm_crtc *crtc); int bdw_enable_vblank(struct drm_crtc *crtc); void i8xx_disable_vblank(struct drm_crtc *crtc); -void i945gm_disable_vblank(struct drm_crtc *crtc); +void i915gm_disable_vblank(struct drm_crtc *crtc); void i965_disable_vblank(struct drm_crtc *crtc); void ilk_disable_vblank(struct drm_crtc *crtc); void bdw_disable_vblank(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 813ddea3f9f1f..6d67bd238cfe2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2709,6 +2709,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VLV_GU_CTL0 _MMIO(VLV_DISPLAY_BASE + 0x2030) #define VLV_GU_CTL1 _MMIO(VLV_DISPLAY_BASE + 0x2034) #define SCPD0 _MMIO(0x209c) /* 915+ only */ +#define CSTATE_RENDER_CLOCK_GATE_DISABLE (1 << 5) #define GEN2_IER _MMIO(0x20a0) #define GEN2_IIR _MMIO(0x20a4) #define GEN2_IMR _MMIO(0x20a8) From 0e5493cab5ef90fb91f89a2dc84259c7a7d7fd50 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Fri, 4 Oct 2019 18:04:32 +0100 Subject: [PATCH 327/331] drm/i915/stolen: make the object creation interface consistent Our other backends return an actual error value upon failure. Do the same for stolen objects, which currently just return NULL on failure. Signed-off-by: CQ Tang Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191004170452.15410-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 4 +- drivers/gpu/drm/i915/display/intel_overlay.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 45 +++++++++++--------- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_rc6.c | 8 ++-- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2 +- 7 files changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5acc39f32d0c9..c15975a410bc2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3066,7 +3066,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, base_aligned, base_aligned, size_aligned); - if (!obj) + if (IS_ERR(obj)) return false; switch (plane_config->tiling) { diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 97cde017670a1..3d1061470e761 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -141,10 +141,10 @@ static int intelfb_alloc(struct drm_fb_helper *helper, /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ - obj = NULL; + obj = ERR_PTR(-ENODEV); if (size * 2 < dev_priv->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); - if (obj == NULL) + if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(dev_priv, size); if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index daea112cbb870..2360f19f96945 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -1291,7 +1291,7 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) int err; obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); - if (obj == NULL) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index fad98a921cde8..c76260ce13e39 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -553,10 +553,11 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, { struct drm_i915_gem_object *obj; unsigned int cache_level; + int err = -ENOMEM; obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; + if (!obj) + goto err; drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops); @@ -566,14 +567,16 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (i915_gem_object_pin_pages(obj)) + err = i915_gem_object_pin_pages(obj); + if (err) goto cleanup; return obj; cleanup: i915_gem_object_free(obj); - return NULL; +err: + return ERR_PTR(err); } struct drm_i915_gem_object * @@ -585,28 +588,32 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, int ret; if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; + return ERR_PTR(-ENODEV); if (size == 0) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); if (ret) { - kfree(stolen); - return NULL; + obj = ERR_PTR(ret); + goto err_free; } obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; + if (IS_ERR(obj)) + goto err_remove; + return obj; + +err_remove: i915_gem_stolen_remove_node(dev_priv, stolen); +err_free: kfree(stolen); - return NULL; + return obj; } struct drm_i915_gem_object * @@ -622,7 +629,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv int ret; if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; + return ERR_PTR(-ENODEV); DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -631,11 +638,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (WARN_ON(size == 0) || WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); stolen->start = stolen_offset; stolen->size = size; @@ -645,15 +652,15 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); - return NULL; + return ERR_PTR(ret); } obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { + if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); - return NULL; + return obj; } /* Some objects just need physical mem from stolen space */ @@ -706,5 +713,5 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); - return NULL; + return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 53220741e49e2..8f44cf8c79b2a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -334,7 +334,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) int ret; obj = i915_gem_object_create_stolen(i915, size); - if (!obj) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index d6167dd592e98..71184aa728961 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -299,8 +299,8 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) pcbr_offset, I915_GTT_OFFSET_NONE, pctx_size); - if (!pctx) - return -ENOMEM; + if (IS_ERR(pctx)) + return PTR_ERR(pctx); goto out; } @@ -316,9 +316,9 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) * memory, or any other relevant ranges. */ pctx = i915_gem_object_create_stolen(i915, pctx_size); - if (!pctx) { + if (IS_ERR(pctx)) { DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); - return -ENOMEM; + return PTR_ERR(pctx); } GEM_BUG_ON(range_overflows_t(u64, diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index ec32996254c09..311fdc0a21bc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1274,7 +1274,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct i915_vma *vma; obj = i915_gem_object_create_stolen(i915, size); - if (!obj) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return ERR_CAST(obj); From 7fd296024c3cd06773a1df712b2c50932aeea60b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 4 Oct 2019 15:20:17 +0300 Subject: [PATCH 328/331] drm/i915/vga: rename intel_vga_msr_write() to intel_vga_reset_io_mem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the function per Ville's suggestion. No functional changes. Cc: Ville Syrjälä Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191004122019.12009-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- drivers/gpu/drm/i915/display/intel_vga.c | 2 +- drivers/gpu/drm/i915/display/intel_vga.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index bb642a1a0dd47..0616284c6da67 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -267,7 +267,7 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv, u8 irq_pipe_mask, bool has_vga) { if (has_vga) - intel_vga_msr_write(dev_priv); + intel_vga_reset_io_mem(dev_priv); if (irq_pipe_mask) gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask); diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index 732568eaa988c..2ff7293986d40 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -72,7 +72,7 @@ void intel_vga_redisable(struct drm_i915_private *i915) intel_display_power_put(i915, POWER_DOMAIN_VGA, wakeref); } -void intel_vga_msr_write(struct drm_i915_private *i915) +void intel_vga_reset_io_mem(struct drm_i915_private *i915) { struct pci_dev *pdev = i915->drm.pdev; diff --git a/drivers/gpu/drm/i915/display/intel_vga.h b/drivers/gpu/drm/i915/display/intel_vga.h index 3517872e62ac4..ba5b55b917f08 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.h +++ b/drivers/gpu/drm/i915/display/intel_vga.h @@ -8,7 +8,7 @@ struct drm_i915_private; -void intel_vga_msr_write(struct drm_i915_private *i915); +void intel_vga_reset_io_mem(struct drm_i915_private *i915); void intel_vga_disable(struct drm_i915_private *i915); void intel_vga_redisable(struct drm_i915_private *i915); void intel_vga_redisable_power_on(struct drm_i915_private *i915); From 63bf8301aac492d4c5fdf7b0ec42b142c05080e3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 4 Oct 2019 15:20:18 +0300 Subject: [PATCH 329/331] drm/i915: split out i915_switcheroo.[ch] from i915_drv.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split out code related to vga switcheroo register/unregister and state handling from i915_drv.c into new i915_switcheroo.[ch] files. It's a bit difficult to draw the line how much to move to the new file from i915_drv.c, but it seemed to me keeping i915_suspend_switcheroo() and i915_resume_switcheroo() in place was the cleanest. No functional changes. Cc: Ville Syrjälä Cc: Chris Wilson Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191004122019.12009-2-jani.nikula@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 63 +++--------------------- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_switcheroo.c | 67 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_switcheroo.h | 14 ++++++ 5 files changed, 92 insertions(+), 56 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_switcheroo.c create mode 100644 drivers/gpu/drm/i915/i915_switcheroo.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4e23f1a5c39f6..45d3be12359d6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -46,6 +46,7 @@ i915-y += i915_drv.o \ i915_pci.o \ i915_scatterlist.o \ i915_suspend.o \ + i915_switcheroo.o \ i915_sysfs.o \ i915_utils.o \ intel_csr.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5183a40d3860b..902a54bb46e0a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -72,6 +72,7 @@ #include "i915_perf.h" #include "i915_query.h" #include "i915_suspend.h" +#include "i915_switcheroo.h" #include "i915_sysfs.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -269,56 +270,8 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv) release_resource(&dev_priv->mch_res); } -static int i915_resume_switcheroo(struct drm_i915_private *i915); -static int i915_suspend_switcheroo(struct drm_i915_private *i915, - pm_message_t state); - -static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) -{ - struct drm_i915_private *i915 = pdev_to_i915(pdev); - pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; - - if (!i915) { - dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); - return; - } - - if (state == VGA_SWITCHEROO_ON) { - pr_info("switched on\n"); - i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; - /* i915 resume handler doesn't set to D0 */ - pci_set_power_state(pdev, PCI_D0); - i915_resume_switcheroo(i915); - i915->drm.switch_power_state = DRM_SWITCH_POWER_ON; - } else { - pr_info("switched off\n"); - i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; - i915_suspend_switcheroo(i915, pmm); - i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF; - } -} - -static bool i915_switcheroo_can_switch(struct pci_dev *pdev) -{ - struct drm_i915_private *i915 = pdev_to_i915(pdev); - - /* - * FIXME: open_count is protected by drm_global_mutex but that would lead to - * locking inversion with the driver load path. And the access here is - * completely racy anyway. So don't bother with locking for now. - */ - return i915 && i915->drm.open_count == 0; -} - -static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { - .set_gpu_state = i915_switcheroo_set_state, - .reprobe = NULL, - .can_switch = i915_switcheroo_can_switch, -}; - static int i915_driver_modeset_probe(struct drm_i915_private *i915) { - struct pci_dev *pdev = i915->drm.pdev; int ret; if (i915_inject_probe_failure(i915)) @@ -339,7 +292,7 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) intel_register_dsm_handler(); - ret = vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); + ret = i915_switcheroo_register(i915); if (ret) goto cleanup_vga_client; @@ -394,7 +347,7 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) cleanup_csr: intel_csr_ucode_fini(i915); intel_power_domains_driver_remove(i915); - vga_switcheroo_unregister_client(pdev); + i915_switcheroo_unregister(i915); cleanup_vga_client: intel_vga_unregister(i915); out: @@ -428,13 +381,12 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) static void i915_driver_modeset_remove(struct drm_i915_private *i915) { - struct pci_dev *pdev = i915->drm.pdev; - intel_modeset_driver_remove(i915); intel_bios_driver_remove(i915); - vga_switcheroo_unregister_client(pdev); + i915_switcheroo_unregister(i915); + intel_vga_unregister(i915); intel_csr_ucode_fini(i915); @@ -1860,8 +1812,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) return ret; } -static int -i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state) +int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state) { int error; @@ -2027,7 +1978,7 @@ static int i915_drm_resume_early(struct drm_device *dev) return ret; } -static int i915_resume_switcheroo(struct drm_i915_private *i915) +int i915_resume_switcheroo(struct drm_i915_private *i915) { int ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cde4c7fb5570c..b367467b03c25 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2189,6 +2189,9 @@ extern const struct dev_pm_ops i915_pm_ops; int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent); void i915_driver_remove(struct drm_i915_private *i915); +int i915_resume_switcheroo(struct drm_i915_private *i915); +int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state); + void intel_engine_init_hangcheck(struct intel_engine_cs *engine); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c new file mode 100644 index 0000000000000..39c79e1c5b521 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "i915_switcheroo.h" + +static void i915_switcheroo_set_state(struct pci_dev *pdev, + enum vga_switcheroo_state state) +{ + struct drm_i915_private *i915 = pdev_to_i915(pdev); + pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; + + if (!i915) { + dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); + return; + } + + if (state == VGA_SWITCHEROO_ON) { + pr_info("switched on\n"); + i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* i915 resume handler doesn't set to D0 */ + pci_set_power_state(pdev, PCI_D0); + i915_resume_switcheroo(i915); + i915->drm.switch_power_state = DRM_SWITCH_POWER_ON; + } else { + pr_info("switched off\n"); + i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; + i915_suspend_switcheroo(i915, pmm); + i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF; + } +} + +static bool i915_switcheroo_can_switch(struct pci_dev *pdev) +{ + struct drm_i915_private *i915 = pdev_to_i915(pdev); + + /* + * FIXME: open_count is protected by drm_global_mutex but that would lead to + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ + return i915 && i915->drm.open_count == 0; +} + +static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { + .set_gpu_state = i915_switcheroo_set_state, + .reprobe = NULL, + .can_switch = i915_switcheroo_can_switch, +}; + +int i915_switcheroo_register(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + return vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); +} + +void i915_switcheroo_unregister(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + vga_switcheroo_unregister_client(pdev); +} diff --git a/drivers/gpu/drm/i915/i915_switcheroo.h b/drivers/gpu/drm/i915/i915_switcheroo.h new file mode 100644 index 0000000000000..59b6c1e07d759 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_switcheroo.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_SWITCHEROO__ +#define __I915_SWITCHEROO__ + +struct drm_i915_private; + +int i915_switcheroo_register(struct drm_i915_private *i915); +void i915_switcheroo_unregister(struct drm_i915_private *i915); + +#endif /* __I915_SWITCHEROO__ */ From 9bfcf1941302c471e8c8138b6b01f7b84d4f5ba4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 4 Oct 2019 15:20:19 +0300 Subject: [PATCH 330/331] drm/i915: move gmbus setup down to intel_modeset_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pair the gmbus setup and teardown in the same layer. This also fixes the double gmbus teardown on the i915_driver_modeset_probe() error path. Move the gmbus setup a bit later in the sequence to make the follow-up refactoring easier, and to pinpoint any unexpected consequences of this change right here, instead of the later refactoring. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191004122019.12009-3-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 ++ drivers/gpu/drm/i915/i915_drv.c | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c15975a410bc2..05fb672a00b92 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -16216,6 +16216,8 @@ int intel_modeset_init(struct drm_i915_private *i915) intel_panel_sanitize_ssc(i915); + intel_gmbus_setup(i915); + DRM_DEBUG_KMS("%d display pipe%s available.\n", INTEL_NUM_PIPES(i915), INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 902a54bb46e0a..15abad5c2d62a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -53,7 +53,6 @@ #include "display/intel_display_types.h" #include "display/intel_dp.h" #include "display/intel_fbdev.h" -#include "display/intel_gmbus.h" #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" @@ -307,8 +306,6 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) if (ret) goto cleanup_csr; - intel_gmbus_setup(i915); - /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ ret = intel_modeset_init(i915); @@ -343,7 +340,6 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915) intel_modeset_driver_remove(i915); cleanup_irq: intel_irq_uninstall(i915); - intel_gmbus_teardown(i915); cleanup_csr: intel_csr_ucode_fini(i915); intel_power_domains_driver_remove(i915); From 9445ad17109b6fe7864acc33f0c62bd9d866b722 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 7 Oct 2019 15:24:47 +0300 Subject: [PATCH 331/331] drm/i915: Update DRIVER_DATE to 20191007 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b367467b03c25..1da67b2421139 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -106,8 +106,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190927" -#define DRIVER_TIMESTAMP 1569587154 +#define DRIVER_DATE "20191007" +#define DRIVER_TIMESTAMP 1570451087 struct drm_i915_gem_object;