Merge tag 'drm-intel-next-fixes-2015-10-22' of git://anongit.freedesk…

…top.org/drm-intel into drm-next Bunch of -fixes for 4.4. Well not just, I've left the mmio/register work from Ville in here since it's low-risk but lots of churn all over. * tag 'drm-intel-next-fixes-2015-10-22' of git://anongit.freedesktop.org/drm-intel: (23 commits) drm/i915: Use round to closest when computing the CEA 1.001 pixel clocks drm/i915: Kill the leftover RMW from ivb_sprite_disable() drm/i915: restore ggtt double-bind avoidance drm/i915/skl: Enable pipe gamma for sprite planes. drm/i915/skl+: Enable pipe CSC on cursor planes. (v2) MAINTAINERS: add link to the Intel Graphics for Linux web site drm/i915: Move skl/bxt gt specific workarounds to ring init drm/i915: Drop i915_gem_obj_is_pinned() from set-cache-level drm/i915: revert a few more watermark commits drm/i915: Remove dev_priv argument from NEEDS_FORCE_WAKE drm/i915: Clean up LVDS register handling drm/i915: Throw out some useless variables drm/i915: Parametrize and fix SWF registers drm/i915: s/PIPE_FRMCOUNT_GM45/PIPE_FRMCOUNT_G4X/ etc. drm/i915: Turn GEN5_ASSERT_IIR_IS_ZERO() into a function drm/i915: Fix a few bad hex numbers in register defines drm/i915: Protect register macro arguments drm/i915: Include gpio_mmio_base in GMBUS reg defines drm/i915: Parametrize HSW video DIP data registers drm/i915: Eliminate weird parameter inversion from BXT PPS registers ...
mariux64 · Oct 29, 2015 · 974e59b · 974e59b
2 parents d7e1bc3 + 606bb5e
commit 974e59b
Show file tree

Hide file tree

Showing 25 changed files with 792 additions and 639 deletions.
diff --git a/MAINTAINERS b/MAINTAINERS
@@ -3584,6 +3584,7 @@ M:	Daniel Vetter <daniel.vetter@intel.com>
 M:	Jani Nikula <jani.nikula@linux.intel.com>
 L:	intel-gfx@lists.freedesktop.org
 L:	dri-devel@lists.freedesktop.org
+W:	https://01.org/linuxgraphics/
 Q:	http://patchwork.freedesktop.org/project/intel-gfx/
 T:	git git://anongit.freedesktop.org/drm-intel
 S:	Supported

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1850,7 +1850,7 @@ static int i915_opregion(struct seq_file *m, void *unused)
 		goto out;
 
 	if (opregion->header) {
-		memcpy_fromio(data, opregion->header, OPREGION_SIZE);
+		memcpy(data, opregion->header, OPREGION_SIZE);
 		seq_write(m, data, OPREGION_SIZE);
 	}
 

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
@@ -450,14 +450,14 @@ struct opregion_swsci;
 struct opregion_asle;
 
 struct intel_opregion {
-	struct opregion_header __iomem *header;
-	struct opregion_acpi __iomem *acpi;
-	struct opregion_swsci __iomem *swsci;
+	struct opregion_header *header;
+	struct opregion_acpi *acpi;
+	struct opregion_swsci *swsci;
 	u32 swsci_gbda_sub_functions;
 	u32 swsci_sbcb_sub_functions;
-	struct opregion_asle __iomem *asle;
-	void __iomem *vbt;
-	u32 __iomem *lid_state;
+	struct opregion_asle *asle;
+	void *vbt;
+	u32 *lid_state;
 	struct work_struct asle_work;
 };
 #define OPREGION_SIZE            (8*1024)
@@ -628,6 +628,10 @@ struct drm_i915_display_funcs {
 			  struct dpll *match_clock,
 			  struct dpll *best_clock);
 	void (*update_wm)(struct drm_crtc *crtc);
+	void (*update_sprite_wm)(struct drm_plane *plane,
+				 struct drm_crtc *crtc,
+				 uint32_t sprite_width, uint32_t sprite_height,
+				 int pixel_size, bool enable, bool scaled);
 	int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
 	void (*modeset_commit_cdclk)(struct drm_atomic_state *state);
 	/* Returns the active state of the crtc, and if the crtc is active,
@@ -1031,7 +1035,7 @@ struct i915_suspend_saved_registers {
 	u32 saveMI_ARB_STATE;
 	u32 saveSWF0[16];
 	u32 saveSWF1[16];
-	u32 saveSWF2[3];
+	u32 saveSWF3[3];
 	uint64_t saveFENCE[I915_MAX_NUM_FENCES];
 	u32 savePCH_PORT_HOTPLUG;
 	u16 saveGCDGMBUS;

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
@@ -3657,60 +3657,117 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
+/**
+ * Changes the cache-level of an object across all VMA.
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				    enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct i915_vma *vma, *next;
+	bool bound = false;
 	int ret = 0;
 
 	if (obj->cache_level == cache_level)
 		goto out;
 
-	if (i915_gem_obj_is_pinned(obj)) {
-		DRM_DEBUG("can not change the cache level of pinned objects\n");
-		return -EBUSY;
-	}
-
+	/* Inspect the list of currently bound VMA and unbind any that would
+	 * be invalid given the new cache-level. This is principally to
+	 * catch the issue of the CS prefetch crossing page boundaries and
+	 * reading an invalid PTE on older architectures.
+	 */
 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (vma->pin_count) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
 		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
 			ret = i915_vma_unbind(vma);
 			if (ret)
 				return ret;
-		}
+		} else
+			bound = true;
 	}
 
-	if (i915_gem_obj_bound_any(obj)) {
+	/* We can reuse the existing drm_mm nodes but need to change the
+	 * cache-level on the PTE. We could simply unbind them all and
+	 * rebind with the correct cache-level on next use. However since
+	 * we already have a valid slot, dma mapping, pages etc, we may as
+	 * rewrite the PTE in the belief that doing so tramples upon less
+	 * state and so involves less work.
+	 */
+	if (bound) {
+		/* Before we change the PTE, the GPU must not be accessing it.
+		 * If we wait upon the object, we know that all the bound
+		 * VMA are no longer active.
+		 */
 		ret = i915_gem_object_wait_rendering(obj, false);
 		if (ret)
 			return ret;
 
-		i915_gem_object_finish_gtt(obj);
-
-		/* Before SandyBridge, you could not use tiling or fence
-		 * registers with snooped memory, so relinquish any fences
-		 * currently pointing to our region in the aperture.
-		 */
-		if (INTEL_INFO(dev)->gen < 6) {
+		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
+			/* Access to snoopable pages through the GTT is
+			 * incoherent and on some machines causes a hard
+			 * lockup. Relinquish the CPU mmaping to force
+			 * userspace to refault in the pages and we can
+			 * then double check if the GTT mapping is still
+			 * valid for that pointer access.
+			 */
+			i915_gem_release_mmap(obj);
+
+			/* As we no longer need a fence for GTT access,
+			 * we can relinquish it now (and so prevent having
+			 * to steal a fence from someone else on the next
+			 * fence request). Note GPU activity would have
+			 * dropped the fence as all snoopable access is
+			 * supposed to be linear.
+			 */
 			ret = i915_gem_object_put_fence(obj);
 			if (ret)
 				return ret;
+		} else {
+			/* We either have incoherent backing store and
+			 * so no GTT access or the architecture is fully
+			 * coherent. In such cases, existing GTT mmaps
+			 * ignore the cache bit in the PTE and we can
+			 * rewrite it without confusing the GPU or having
+			 * to force userspace to fault back in its mmaps.
+			 */
 		}
 
-		list_for_each_entry(vma, &obj->vma_list, vma_link)
-			if (drm_mm_node_allocated(&vma->node)) {
-				ret = i915_vma_bind(vma, cache_level,
-						    PIN_UPDATE);
-				if (ret)
-					return ret;
-			}
+		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
 	}
 
 	list_for_each_entry(vma, &obj->vma_list, vma_link)
 		vma->node.color = cache_level;
 	obj->cache_level = cache_level;
 
 out:
+	/* Flush the dirty CPU caches to the backing storage so that the
+	 * object is now coherent at its new cache level (with respect
+	 * to the access domain).
+	 */
 	if (obj->cache_dirty &&
 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
 	    cpu_write_needs_clflush(obj)) {

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2501,6 +2501,36 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 static int ggtt_bind_vma(struct i915_vma *vma,
 			 enum i915_cache_level cache_level,
 			 u32 flags)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	u32 pte_flags = 0;
+	int ret;
+
+	ret = i915_get_ggtt_vma_pages(vma);
+	if (ret)
+		return ret;
+
+	/* Currently applicable only to VLV */
+	if (obj->gt_ro)
+		pte_flags |= PTE_READ_ONLY;
+
+	vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
+				vma->node.start,
+				cache_level, pte_flags);
+
+	/*
+	 * Without aliasing PPGTT there's no difference between
+	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+	 * upgrade to both bound if we bind either to avoid double-binding.
+	 */
+	vma->bound |= GLOBAL_BIND | LOCAL_BIND;
+
+	return 0;
+}
+
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+				 enum i915_cache_level cache_level,
+				 u32 flags)
 {
 	struct drm_device *dev = vma->vm->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2519,23 +2549,13 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 		pte_flags |= PTE_READ_ONLY;
 
 
-	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
+	if (flags & GLOBAL_BIND) {
 		vma->vm->insert_entries(vma->vm, pages,
 					vma->node.start,
 					cache_level, pte_flags);
-
-		/* Note the inconsistency here is due to absence of the
-		 * aliasing ppgtt on gen4 and earlier. Though we always
-		 * request PIN_USER for execbuffer (translated to LOCAL_BIND),
-		 * without the appgtt, we cannot honour that request and so
-		 * must substitute it with a global binding. Since we do this
-		 * behind the upper layers back, we need to explicitly set
-		 * the bound flag ourselves.
-		 */
-		vma->bound |= GLOBAL_BIND;
 	}
 
-	if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
+	if (flags & LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base, pages,
 					    vma->node.start,
@@ -2699,6 +2719,8 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
 					true);
 
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
+		WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
+		dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
 	}
 
 	return 0;

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
@@ -139,27 +139,30 @@ static const u32 hpd_bxt[HPD_NUM_PINS] = {
 /*
  * We should clear IMR at preinstall/uninstall, and just check at postinstall.
  */
-#define GEN5_ASSERT_IIR_IS_ZERO(reg) do { \
-	u32 val = I915_READ(reg); \
-	if (val) { \
-		WARN(1, "Interrupt register 0x%x is not zero: 0x%08x\n", \
-		     (reg), val); \
-		I915_WRITE((reg), 0xffffffff); \
-		POSTING_READ(reg); \
-		I915_WRITE((reg), 0xffffffff); \
-		POSTING_READ(reg); \
-	} \
-} while (0)
+static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv, u32 reg)
+{
+	u32 val = I915_READ(reg);
+
+	if (val == 0)
+		return;
+
+	WARN(1, "Interrupt register 0x%x is not zero: 0x%08x\n",
+	     reg, val);
+	I915_WRITE(reg, 0xffffffff);
+	POSTING_READ(reg);
+	I915_WRITE(reg, 0xffffffff);
+	POSTING_READ(reg);
+}
 
 #define GEN8_IRQ_INIT_NDX(type, which, imr_val, ier_val) do { \
-	GEN5_ASSERT_IIR_IS_ZERO(GEN8_##type##_IIR(which)); \
+	gen5_assert_iir_is_zero(dev_priv, GEN8_##type##_IIR(which)); \
 	I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \
 	I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \
 	POSTING_READ(GEN8_##type##_IMR(which)); \
 } while (0)
 
 #define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \
-	GEN5_ASSERT_IIR_IS_ZERO(type##IIR); \
+	gen5_assert_iir_is_zero(dev_priv, type##IIR); \
 	I915_WRITE(type##IER, (ier_val)); \
 	I915_WRITE(type##IMR, (imr_val)); \
 	POSTING_READ(type##IMR); \
@@ -707,12 +710,11 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 	return (((high1 << 8) | low) + (pixel >= vbl_start)) & 0xffffff;
 }
 
-static u32 gm45_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
+static u32 g4x_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int reg = PIPE_FRMCOUNT_GM45(pipe);
 
-	return I915_READ(reg);
+	return I915_READ(PIPE_FRMCOUNT_G4X(pipe));
 }
 
 /* raw reads, only for fast reads of display block, no need for forcewake etc. */
@@ -3365,7 +3367,7 @@ static void ibx_irq_postinstall(struct drm_device *dev)
 	else
 		mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT;
 
-	GEN5_ASSERT_IIR_IS_ZERO(SDEIIR);
+	gen5_assert_iir_is_zero(dev_priv, SDEIIR);
 	I915_WRITE(SDEIMR, ~mask);
 }
 
@@ -4397,7 +4399,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 		dev->driver->get_vblank_counter = i8xx_get_vblank_counter;
 	} else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) {
 		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
-		dev->driver->get_vblank_counter = gm45_get_vblank_counter;
+		dev->driver->get_vblank_counter = g4x_get_vblank_counter;
 	} else {
 		dev->driver->get_vblank_counter = i915_get_vblank_counter;
 		dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */