Merge tag 'drm-intel-fixes-2018-11-15' of git://anongit.freedesktop.o…

…rg/drm/drm-intel into drm-fixes - Fix Bugzilla #108712: Fix incorrect EU count report from kernel - Fix to account for scale factor when calculating initial phase on scaled output - Avoid too trigger-happy HPD storm detection and fix a race and an OOPS for MST systems. - Relocation race fix for Gen4/5 - A couple ICL fixes and dependencies for above Fixes:. Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181115164709.GA13430@jlahtine-desk.ger.corp.intel.com
mariux64 · Nov 15, 2018 · 20325e8 · 20325e8
2 parents 9826b11 + 6e8adf6
commit 20325e8
Show file tree

Hide file tree

Showing 9 changed files with 208 additions and 80 deletions.
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
@@ -474,7 +474,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 			u8 eu_disabled_mask;
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask[ss] & BIT(ss)))
+			if (!(sseu->subslice_mask[s] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
@@ -4850,15 +4850,47 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe)
  * chroma samples for both of the luma samples, and thus we don't
  * actually get the expected MPEG2 chroma siting convention :(
  * The same behaviour is observed on pre-SKL platforms as well.
+ *
+ * Theory behind the formula (note that we ignore sub-pixel
+ * source coordinates):
+ * s = source sample position
+ * d = destination sample position
+ *
+ * Downscaling 4:1:
+ * -0.5
+ * | 0.0
+ * | |     1.5 (initial phase)
+ * | |     |
+ * v v     v
+ * | s | s | s | s |
+ * |       d       |
+ *
+ * Upscaling 1:4:
+ * -0.5
+ * | -0.375 (initial phase)
+ * | |     0.0
+ * | |     |
+ * v v     v
+ * |       s       |
+ * | d | d | d | d |
  */
-u16 skl_scaler_calc_phase(int sub, bool chroma_cosited)
+u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_cosited)
 {
 	int phase = -0x8000;
 	u16 trip = 0;
 
 	if (chroma_cosited)
 		phase += (sub - 1) * 0x8000 / sub;
 
+	phase += scale / (2 * sub);
+
+	/*
+	 * Hardware initial phase limited to [-0.5:1.5].
+	 * Since the max hardware scale factor is 3.0, we
+	 * should never actually excdeed 1.0 here.
+	 */
+	WARN_ON(phase < -0x8000 || phase > 0x18000);
+
 	if (phase < 0)
 		phase = 0x10000 + phase;
 	else
@@ -5067,13 +5099,20 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
 
 	if (crtc->config->pch_pfit.enabled) {
 		u16 uv_rgb_hphase, uv_rgb_vphase;
+		int pfit_w, pfit_h, hscale, vscale;
 		int id;
 
 		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
 			return;
 
-		uv_rgb_hphase = skl_scaler_calc_phase(1, false);
-		uv_rgb_vphase = skl_scaler_calc_phase(1, false);
+		pfit_w = (crtc->config->pch_pfit.size >> 16) & 0xFFFF;
+		pfit_h = crtc->config->pch_pfit.size & 0xFFFF;
+
+		hscale = (crtc->config->pipe_src_w << 16) / pfit_w;
+		vscale = (crtc->config->pipe_src_h << 16) / pfit_h;
+
+		uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false);
+		uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false);
 
 		id = scaler_state->scaler_id;
 		I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |

diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -452,6 +452,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 	if (!intel_connector)
 		return NULL;
 
+	intel_connector->get_hw_state = intel_dp_mst_get_hw_state;
+	intel_connector->mst_port = intel_dp;
+	intel_connector->port = port;
+
 	connector = &intel_connector->base;
 	ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
 				 DRM_MODE_CONNECTOR_DisplayPort);
@@ -462,10 +466,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 
 	drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs);
 
-	intel_connector->get_hw_state = intel_dp_mst_get_hw_state;
-	intel_connector->mst_port = intel_dp;
-	intel_connector->port = port;
-
 	for_each_pipe(dev_priv, pipe) {
 		struct drm_encoder *enc =
 			&intel_dp->mst_encoders[pipe]->base.base;

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
@@ -1646,7 +1646,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
 				  struct intel_crtc_state *crtc_state);
 
-u16 skl_scaler_calc_phase(int sub, bool chroma_center);
+u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center);
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(const struct intel_crtc_state *crtc_state,
 		  u32 pixel_format);

diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -228,7 +228,9 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		drm_for_each_connector_iter(connector, &conn_iter) {
 			struct intel_connector *intel_connector = to_intel_connector(connector);
 
-			if (intel_connector->encoder->hpd_pin == pin) {
+			/* Don't check MST ports, they don't have pins */
+			if (!intel_connector->mst_port &&
+			    intel_connector->encoder->hpd_pin == pin) {
 				if (connector->polled != intel_connector->polled)
 					DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n",
 							 connector->name);
@@ -395,37 +397,54 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 	struct intel_encoder *encoder;
 	bool storm_detected = false;
 	bool queue_dig = false, queue_hp = false;
+	u32 long_hpd_pulse_mask = 0;
+	u32 short_hpd_pulse_mask = 0;
+	enum hpd_pin pin;
 
 	if (!pin_mask)
 		return;
 
 	spin_lock(&dev_priv->irq_lock);
+
+	/*
+	 * Determine whether ->hpd_pulse() exists for each pin, and
+	 * whether we have a short or a long pulse. This is needed
+	 * as each pin may have up to two encoders (HDMI and DP) and
+	 * only the one of them (DP) will have ->hpd_pulse().
+	 */
 	for_each_intel_encoder(&dev_priv->drm, encoder) {
-		enum hpd_pin pin = encoder->hpd_pin;
 		bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder);
+		enum port port = encoder->port;
+		bool long_hpd;
 
+		pin = encoder->hpd_pin;
 		if (!(BIT(pin) & pin_mask))
 			continue;
 
-		if (has_hpd_pulse) {
-			bool long_hpd = long_mask & BIT(pin);
-			enum port port = encoder->port;
+		if (!has_hpd_pulse)
+			continue;
 
-			DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port),
-					 long_hpd ? "long" : "short");
-			/*
-			 * For long HPD pulses we want to have the digital queue happen,
-			 * but we still want HPD storm detection to function.
-			 */
-			queue_dig = true;
-			if (long_hpd) {
-				dev_priv->hotplug.long_port_mask |= (1 << port);
-			} else {
-				/* for short HPD just trigger the digital queue */
-				dev_priv->hotplug.short_port_mask |= (1 << port);
-				continue;
-			}
+		long_hpd = long_mask & BIT(pin);
+
+		DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port),
+				 long_hpd ? "long" : "short");
+		queue_dig = true;
+
+		if (long_hpd) {
+			long_hpd_pulse_mask |= BIT(pin);
+			dev_priv->hotplug.long_port_mask |= BIT(port);
+		} else {
+			short_hpd_pulse_mask |= BIT(pin);
+			dev_priv->hotplug.short_port_mask |= BIT(port);
 		}
+	}
+
+	/* Now process each pin just once */
+	for_each_hpd_pin(pin) {
+		bool long_hpd;
+
+		if (!(BIT(pin) & pin_mask))
+			continue;
 
 		if (dev_priv->hotplug.stats[pin].state == HPD_DISABLED) {
 			/*
@@ -442,11 +461,22 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 		if (dev_priv->hotplug.stats[pin].state != HPD_ENABLED)
 			continue;
 
-		if (!has_hpd_pulse) {
+		/*
+		 * Delegate to ->hpd_pulse() if one of the encoders for this
+		 * pin has it, otherwise let the hotplug_work deal with this
+		 * pin directly.
+		 */
+		if (((short_hpd_pulse_mask | long_hpd_pulse_mask) & BIT(pin))) {
+			long_hpd = long_hpd_pulse_mask & BIT(pin);
+		} else {
 			dev_priv->hotplug.event_bits |= BIT(pin);
+			long_hpd = true;
 			queue_hp = true;
 		}
 
+		if (!long_hpd)
+			continue;
+
 		if (intel_hpd_irq_storm_detect(dev_priv, pin)) {
 			dev_priv->hotplug.event_bits &= ~BIT(pin);
 			storm_detected = true;

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
@@ -424,14 +424,26 @@ static u64 execlists_update_context(struct i915_request *rq)
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
-	/* True 32b PPGTT with dynamic page allocation: update PDP
+	/*
+	 * True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
 	 * PML4 is allocated during ppgtt init, so this is not needed
 	 * in 48-bit mode.
 	 */
 	if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm))
 		execlists_update_context_pdps(ppgtt, reg_state);
 
+	/*
+	 * Make sure the context image is complete before we submit it to HW.
+	 *
+	 * Ostensibly, writes (including the WCB) should be flushed prior to
+	 * an uncached write such as our mmio register access, the empirical
+	 * evidence (esp. on Braswell) suggests that the WC write into memory
+	 * may not be visible to the HW prior to the completion of the UC
+	 * register write and that we may begin execution from the context
+	 * before its image is complete leading to invalid PD chasing.
+	 */
+	wmb();
 	return ce->lrc_desc;
 }
 

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -91,6 +91,7 @@ static int
 gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 {
 	u32 cmd, *cs;
+	int i;
 
 	/*
 	 * read/write caches:
@@ -127,12 +128,45 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 			cmd |= MI_INVALIDATE_ISP;
 	}
 
-	cs = intel_ring_begin(rq, 2);
+	i = 2;
+	if (mode & EMIT_INVALIDATE)
+		i += 20;
+
+	cs = intel_ring_begin(rq, i);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
 	*cs++ = cmd;
-	*cs++ = MI_NOOP;
+
+	/*
+	 * A random delay to let the CS invalidate take effect? Without this
+	 * delay, the GPU relocation path fails as the CS does not see
+	 * the updated contents. Just as important, if we apply the flushes
+	 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
+	 * write and before the invalidate on the next batch), the relocations
+	 * still fail. This implies that is a delay following invalidation
+	 * that is required to reset the caches as opposed to a delay to
+	 * ensure the memory is written.
+	 */
+	if (mode & EMIT_INVALIDATE) {
+		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
+			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = 0;
+		*cs++ = 0;
+
+		for (i = 0; i < 12; i++)
+			*cs++ = MI_FLUSH;
+
+		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
+			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	*cs++ = cmd;
+
 	intel_ring_advance(rq, cs);
 
 	return 0;

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2748,6 +2748,12 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 			.hsw.has_fuses = true,
 		},
 	},
+	{
+		.name = "DC off",
+		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
 	{
 		.name = "power well 2",
 		.domains = ICL_PW_2_POWER_DOMAINS,
@@ -2759,12 +2765,6 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 			.hsw.has_fuses = true,
 		},
 	},
-	{
-		.name = "DC off",
-		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
 	{
 		.name = "power well 3",
 		.domains = ICL_PW_3_POWER_DOMAINS,
@@ -3176,8 +3176,7 @@ static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
 void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices)
 {
-	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
-	u32 val;
+	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
 	bool ret;
 
 	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
@@ -3188,7 +3187,6 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 	if (req_slices == hw_enabled_slices || req_slices == 0)
 		return;
 
-	val = I915_READ(DBUF_CTL_S2);
 	if (req_slices > hw_enabled_slices)
 		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
 	else