From 3b25b31fd15ed5a25822b450757dc33bc2d6b63b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Feb 2014 14:06:06 +0100 Subject: [PATCH 001/107] drm/i915: tune down user-triggerable dmesg noise in the cursor/overlay code Spotted while auditing the code for fencing issues. Cc: Chris Wilson Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 10 +++++----- drivers/gpu/drm/i915/intel_overlay.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e0b7d06687f71..c559c58af4831 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7549,7 +7549,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, return -ENOENT; if (obj->base.size < width * height * 4) { - DRM_ERROR("buffer is to small\n"); + DRM_DEBUG_KMS("buffer is to small\n"); ret = -ENOMEM; goto fail; } @@ -7560,7 +7560,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, unsigned alignment; if (obj->tiling_mode) { - DRM_ERROR("cursor cannot be tiled\n"); + DRM_DEBUG_KMS("cursor cannot be tiled\n"); ret = -EINVAL; goto fail_locked; } @@ -7576,13 +7576,13 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, ret = i915_gem_object_pin_to_display_plane(obj, alignment, NULL); if (ret) { - DRM_ERROR("failed to move cursor bo into the GTT\n"); + DRM_DEBUG_KMS("failed to move cursor bo into the GTT\n"); goto fail_locked; } ret = i915_gem_object_put_fence(obj); if (ret) { - DRM_ERROR("failed to release fence for cursor"); + DRM_DEBUG_KMS("failed to release fence for cursor"); goto fail_unpin; } @@ -7593,7 +7593,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, (intel_crtc->pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1, align); if (ret) { - DRM_ERROR("failed to attach phys object\n"); + DRM_DEBUG_KMS("failed to attach phys object\n"); goto fail_locked; } addr = obj->phys_obj->handle->busaddr; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index ac519cb46f22a..312961a8472e3 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1076,7 +1076,7 @@ int intel_overlay_put_image(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); if (new_bo->tiling_mode) { - DRM_ERROR("buffer used for overlay image can not be tiled\n"); + DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); ret = -EINVAL; goto out_unlock; } From b5ea642a76ee0884a4d378b4d5fe290ddb461524 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 2 Mar 2014 21:18:00 +0100 Subject: [PATCH 002/107] drm/i915: sprinkle static Apparently we've missed a few more than what Fengguang's 0-day tester recently reported in i915_irq.c ... Makes sparse happy again (ignore some spurious stuff about ksyms of exported functions). Cc: kbuild test robot Cc: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 69f2ebbd6af1b..484415bb34974 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -482,7 +482,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_device *dev, } -void +static void __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, u32 enable_mask, u32 status_mask) { @@ -506,7 +506,7 @@ __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, POSTING_READ(reg); } -void +static void __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, u32 enable_mask, u32 status_mask) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c559c58af4831..863500893e96b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7692,7 +7692,7 @@ __intel_framebuffer_create(struct drm_device *dev, return ERR_PTR(ret); } -struct drm_framebuffer * +static struct drm_framebuffer * intel_framebuffer_create(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_i915_gem_object *obj) @@ -10541,10 +10541,10 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = { .create_handle = intel_user_framebuffer_create_handle, }; -int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *intel_fb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +static int intel_framebuffer_init(struct drm_device *dev, + struct intel_framebuffer *intel_fb, + struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_i915_gem_object *obj) { int aligned_height; int pitch_limit; From d4d5be6192caaee39e40ea7a7017316237ae195f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 21 Feb 2014 08:55:27 +0100 Subject: [PATCH 003/107] drm/i915: Remove dead code The i915 driver sets DRIVER_GEM unconditionally, so testing for the feature will always fail. Signed-off-by: Thierry Reding [danvet: Fix up conflicts.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f8c21a6dd6635..da74522f377da 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -746,9 +746,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct i915_hw_context *ctx; int ret; - if (!(dev->driver->driver_features & DRIVER_GEM)) - return -ENODEV; - if (!HAS_HW_CONTEXTS(dev)) return -ENODEV; @@ -775,9 +772,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, struct i915_hw_context *ctx; int ret; - if (!(dev->driver->driver_features & DRIVER_GEM)) - return -ENODEV; - if (args->ctx_id == DEFAULT_CONTEXT_ID) return -ENOENT; From da7235692c5be122a776d04ab18e591fbc73f13c Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 19 Dec 2013 11:54:51 -0200 Subject: [PATCH 004/107] drm/i915: rename modeset_update_power_wells To modeset_update_crtc_power_domains, since this function is responsible for updating all the power domains of all CRTCs after a modeset. In the future we should also run this function on all platforms, not just Haswell. Signed-off-by: Paulo Zanoni Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 863500893e96b..8d316c8bdf8fa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6873,7 +6873,7 @@ void intel_display_set_init_power(struct drm_device *dev, bool enable) dev_priv->power_domains.init_power_on = enable; } -static void modeset_update_power_wells(struct drm_device *dev) +static void modeset_update_crtc_power_domains(struct drm_device *dev) { unsigned long pipe_domains[I915_MAX_PIPES] = { 0, }; struct intel_crtc *crtc; @@ -6910,7 +6910,7 @@ static void modeset_update_power_wells(struct drm_device *dev) static void haswell_modeset_global_resources(struct drm_device *dev) { - modeset_update_power_wells(dev); + modeset_update_crtc_power_domains(dev); hsw_update_package_c8(dev); } From 5bfa0199e95220e10d57204b856f0a361270fefe Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 19 Dec 2013 11:54:52 -0200 Subject: [PATCH 005/107] drm/i915: get/put runtime PM without holding rps.hw_lock We'll need this when we merge PC8 and Runtime PM: the PC8 enable/disable functions need that lock. Also, it's good practice to not hold a lock for longer than strictly needed. Signed-off-by: Paulo Zanoni Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d90a70744d936..92dd2067fb08e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1468,7 +1468,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; drm_i915_private_t *dev_priv = dev->dev_private; - int ret; + int ret = 0; int gpu_freq, ia_freq; if (!(IS_GEN6(dev) || IS_GEN7(dev))) { @@ -1476,12 +1476,13 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) return 0; } + intel_runtime_pm_get(dev_priv); + flush_delayed_work(&dev_priv->rps.delayed_resume_work); ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); if (ret) - return ret; - intel_runtime_pm_get(dev_priv); + goto out; seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -1498,10 +1499,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 8) & 0xff) * 100); } - intel_runtime_pm_put(dev_priv); mutex_unlock(&dev_priv->rps.hw_lock); - return 0; +out: + intel_runtime_pm_put(dev_priv); + return ret; } static int i915_gfxec(struct seq_file *m, void *unused) From 8d85d27281095e4df6eb97ae84326b5814337337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Feb 2014 21:59:15 +0200 Subject: [PATCH 006/107] drm/i915: Fix SNB GT_MODE register setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On SNB we set up WaSetupGtModeTdRowDispatch:snb early in gen6_init_clock_gating(). That sets a bit in the GEN6_GT_MODE register. However later we go and disable all the bits in the same register. And then we go on to set some other bit. So apparently we never actually implemented this workaround since the "disable all bits" part was there already before the w/a got supposedly implemented. These are the relevant commits: commit 6547fbdbfff62c99e4f7b4f985ff8b3454f33b0f Author: Daniel Vetter Date: Fri Dec 14 23:38:29 2012 +0100 drm/i915: Implement WaSetupGtModeTdRowDispatch commit f8f2ac9a76b0f80a6763ca316116a7bab8486997 Author: Ben Widawsky Date: Wed Oct 3 19:34:24 2012 -0700 drm/i915: Fix GT_MODE default value So, let's drop the "disable all bits" part, move both writes to closer proxomity to each other, and name the WIZ hashing bits appropriately. BSpec is still a bit confused how the bits should actually be interpreted, but I took the the description for the high bit since the low bit part only lists values for a single bit. Also add a comment about our choice of WIZ hashing mode. Signed-off-by: Ville Syrjälä Reviewed-by: Antti Koskipää Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 6 +++++- drivers/gpu/drm/i915/intel_pm.c | 12 +++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2f564ce37d2cb..071c17d408c85 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -798,7 +798,11 @@ # define ASYNC_FLIP_PERF_DISABLE (1 << 14) #define GEN6_GT_MODE 0x20d0 -#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_WIZ_HASHING(hi, lo) (((hi) << 9) | ((lo) << 7)) +#define GEN6_WIZ_HASHING_8x8 GEN6_WIZ_HASHING(0, 0) +#define GEN6_WIZ_HASHING_8x4 GEN6_WIZ_HASHING(0, 1) +#define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) +#define GEN6_WIZ_HASHING_MASK (GEN6_WIZ_HASHING(1, 1) << 16) #define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) #define GFX_MODE 0x02520 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a6b877a4a916e..45dd23f12a6f4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4661,6 +4661,13 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); + /* + * BSpec recoomends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + */ + I915_WRITE(GEN6_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + ilk_init_lp_watermarks(dev); I915_WRITE(CACHE_MODE_0, @@ -4724,11 +4731,6 @@ static void gen6_init_clock_gating(struct drm_device *dev) g4x_disable_trickle_feed(dev); - /* The default value should be 0x200 according to docs, but the two - * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */ - I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff)); - I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI)); - cpt_init_clock_gating(dev); gen6_check_mch_setup(dev); From 5eb146dd0b092b79f2c2c5449e7e78e94c076ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Feb 2014 21:59:16 +0200 Subject: [PATCH 007/107] drm/i915: Assume we implement WaStripsFansDisableFastClipPerformanceFix:snb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the name, the workaround we implement is WaStripsFansDisableFastClipPerformanceFix. Unfortunately there's no description in the w/a database, so this is just a guess. Signed-off-by: Ville Syrjälä Reviewed-by: Antti Koskipää Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 45dd23f12a6f4..f4ac8f324b1c5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4695,6 +4695,7 @@ static void gen6_init_clock_gating(struct drm_device *dev) GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | GEN6_RCCUNIT_CLOCK_GATE_DISABLE); + /* WaStripsFansDisableFastClipPerformanceFix:snb */ /* Bspec says we need to always set all mask bits. */ I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) | _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL); From 743b57d830b8834026508050bd138c1247fccd4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Feb 2014 21:59:17 +0200 Subject: [PATCH 008/107] drm/i915: There's no need to mask all 3D_CHICKEN bits on SNB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The need to set all of the mask bits for 3D_CHICKEN3 was required only for pre-production hardware. Signed-off-by: Ville Syrjälä Reviewed-by: Antti Koskipää Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4ac8f324b1c5..08c1a756cdadb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4696,9 +4696,8 @@ static void gen6_init_clock_gating(struct drm_device *dev) GEN6_RCCUNIT_CLOCK_GATE_DISABLE); /* WaStripsFansDisableFastClipPerformanceFix:snb */ - /* Bspec says we need to always set all mask bits. */ - I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) | - _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL); + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); /* * Bspec says: From a607c1a41d7dae073d6b79460b26e818c772984e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Feb 2014 21:59:19 +0200 Subject: [PATCH 009/107] drm/i915: Change IVB WIZ hashing mode to 16x4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BSpec recommends using 8x4 hashing mode when MSAA is used. But in practice 16x4 seems to have a slight edge in performance (on IVB and HSW at least). So just use 16x4. Signed-off-by: Ville Syrjälä Reviewed-by: Antti Koskipää Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 071c17d408c85..5e832b1fd9d62 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -798,6 +798,7 @@ # define ASYNC_FLIP_PERF_DISABLE (1 << 14) #define GEN6_GT_MODE 0x20d0 +#define GEN7_GT_MODE 0x7008 #define GEN6_WIZ_HASHING(hi, lo) (((hi) << 9) | ((lo) << 7)) #define GEN6_WIZ_HASHING_8x8 GEN6_WIZ_HASHING(0, 0) #define GEN6_WIZ_HASHING_8x4 GEN6_WIZ_HASHING(0, 1) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 08c1a756cdadb..57101f2321fd8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4954,6 +4954,13 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(CACHE_MODE_1, _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + */ + I915_WRITE(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); snpcr &= ~GEN6_MBC_SNPCR_MASK; snpcr |= GEN6_MBC_SNPCR_MED; From a12c4967c96a41cbfc95a8cf8bc7bd697d9df054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Feb 2014 21:59:20 +0200 Subject: [PATCH 010/107] drm/i915: Change HSW WIZ hashing mode to 16x4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BSpec recommends using 8x4 hashing mode when MSAA is used. But in practice 16x4 seems to have a slight edge in performance (on IVB and HSW at least). So just use 16x4. Signed-off-by: Ville Syrjälä Reviewed-by: Antti Koskipää Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 57101f2321fd8..7da7360fea359 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4873,6 +4873,13 @@ static void haswell_init_clock_gating(struct drm_device *dev) I915_WRITE(CACHE_MODE_1, _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + */ + I915_WRITE(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); From 36075a4cad5adab51a97f32abf41db00975cabd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 4 Feb 2014 21:59:21 +0200 Subject: [PATCH 011/107] drm/i915: Change BDW WIZ hashing mode to 16x4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BSpec recommends using 8x4 hashing mode when MSAA is used. But in practice 16x4 seems to have a slight edge in performance (on IVB and HSW at least). So just use 16x4. Signed-off-by: Ville Syrjälä Reviewed-by: Antti Koskipää Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7da7360fea359..151afe53cc7cf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4843,6 +4843,13 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_FF_THREAD_MODE, I915_READ(GEN7_FF_THREAD_MODE) & ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + */ + I915_WRITE(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); } static void haswell_init_clock_gating(struct drm_device *dev) From c5c98a58990c1b2cf4d94b6759bed239976e5305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 5 Feb 2014 12:43:47 +0200 Subject: [PATCH 012/107] drm/i915: Add a comment about WIZ hashing vs. thread counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a comment next to our WIZ hashing setup to remind people about the link between WIZ hashing disable bit and PS/WM thread counts. Suggested-by: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 151afe53cc7cf..3e754fec58b5d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4664,6 +4664,10 @@ static void gen6_init_clock_gating(struct drm_device *dev) /* * BSpec recoomends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN6_GT_MODE, GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); @@ -4847,6 +4851,10 @@ static void gen8_init_clock_gating(struct drm_device *dev) /* * BSpec recommends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); @@ -4883,6 +4891,10 @@ static void haswell_init_clock_gating(struct drm_device *dev) /* * BSpec recommends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); @@ -4971,6 +4983,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) /* * BSpec recommends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ I915_WRITE(GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); From 321f2ada91ef142894f165814fc196e0cb168262 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 20 Feb 2014 11:47:06 -0800 Subject: [PATCH 013/107] drm/i915: Move ppgtt_release out of the header At one time it was expected to be called in multiple places by kref_put. At the current time however, it is all contained within i915_gem_context.c. This patch makes an upcoming required addition a bit nicer since it too doesn't need to be defined in a header file. Signed-off-by: Ben Widawsky Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 36 ------------------------- drivers/gpu/drm/i915/i915_gem_context.c | 36 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b4587ac4053a5..f732433ea75e3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2379,42 +2379,6 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) return HAS_ALIASING_PPGTT(dev); } -static inline void ppgtt_release(struct kref *kref) -{ - struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_address_space *vm = &ppgtt->base; - - if (ppgtt == dev_priv->mm.aliasing_ppgtt || - (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { - ppgtt->base.cleanup(&ppgtt->base); - return; - } - - /* - * Make sure vmas are unbound before we take down the drm_mm - * - * FIXME: Proper refcounting should take care of this, this shouldn't be - * needed at all. - */ - if (!list_empty(&vm->active_list)) { - struct i915_vma *vma; - - list_for_each_entry(vma, &vm->active_list, mm_list) - if (WARN_ON(list_empty(&vma->vma_link) || - list_is_singular(&vma->vma_link))) - break; - - i915_gem_evict_vm(&ppgtt->base, true); - } else { - i915_gem_retire_requests(dev); - i915_gem_evict_vm(&ppgtt->base, false); - } - - ppgtt->base.cleanup(&ppgtt->base); -} - /* i915_gem_stolen.c */ int i915_gem_init_stolen(struct drm_device *dev); int i915_gem_stolen_setup_compression(struct drm_device *dev, int size); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index da74522f377da..0785ddbb3d465 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -99,6 +99,42 @@ static int do_switch(struct intel_ring_buffer *ring, struct i915_hw_context *to); +static void ppgtt_release(struct kref *kref) +{ + struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_address_space *vm = &ppgtt->base; + + if (ppgtt == dev_priv->mm.aliasing_ppgtt || + (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { + ppgtt->base.cleanup(&ppgtt->base); + return; + } + + /* + * Make sure vmas are unbound before we take down the drm_mm + * + * FIXME: Proper refcounting should take care of this, this shouldn't be + * needed at all. + */ + if (!list_empty(&vm->active_list)) { + struct i915_vma *vma; + + list_for_each_entry(vma, &vm->active_list, mm_list) + if (WARN_ON(list_empty(&vma->vma_link) || + list_is_singular(&vma->vma_link))) + break; + + i915_gem_evict_vm(&ppgtt->base, true); + } else { + i915_gem_retire_requests(dev); + i915_gem_evict_vm(&ppgtt->base, false); + } + + ppgtt->base.cleanup(&ppgtt->base); +} + static size_t get_context_alignment(struct drm_device *dev) { if (IS_GEN6(dev)) From b18b6bde300e1abe30e8b27501411a4b4a95ffeb Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 20 Feb 2014 11:47:07 -0800 Subject: [PATCH 014/107] drm/i915/bdw: Free PPGTT struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GEN8 never freed the PPGTT struct. As GEN8 doesn't use full PPGTT, the leak is small and only found on a module reload. ie. I don't think this needs to go to stable. v2: The very naive, kfree in gen8 ppgtt cleanup, is subject to a double free on PPGTT initialization failure. (Spotted by Imre). Instead this patch pulls the ppgtt struct freeing out of the cleanup and leaves it to the allocators/callers or the one doing the last kref_put as in standard convention Reported-by: Ville Syrjälä Signed-off-by: Ben Widawsky Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 12 ++++++++++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 - 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0785ddbb3d465..d194f5084edfa 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -99,9 +99,8 @@ static int do_switch(struct intel_ring_buffer *ring, struct i915_hw_context *to); -static void ppgtt_release(struct kref *kref) +static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) { - struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct i915_address_space *vm = &ppgtt->base; @@ -135,6 +134,15 @@ static void ppgtt_release(struct kref *kref) ppgtt->base.cleanup(&ppgtt->base); } +static void ppgtt_release(struct kref *kref) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(kref, struct i915_hw_ppgtt, ref); + + do_ppgtt_cleanup(ppgtt); + kfree(ppgtt); +} + static size_t get_context_alignment(struct drm_device *dev) { if (IS_GEN6(dev)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 69a88d41a2a7e..49e79fbee45d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -859,7 +859,6 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) for (i = 0; i < ppgtt->num_pd_entries; i++) __free_page(ppgtt->pt_pages[i]); kfree(ppgtt->pt_pages); - kfree(ppgtt); } static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) From f3a964b96d792eac67976a3bb296a0d2049b4f91 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 19 Feb 2014 22:05:42 -0800 Subject: [PATCH 015/107] drm/i915/bdw: Reorganize PPGTT init Create 3 clear stages in PPGTT init. This will help with upcoming changes be more readable. The 3 stages are, allocation, dma mapping, and writing the P[DT]Es One nice benefit to the patches is that it makes 2 very clear error points, allocation, and mapping, and avoids having to do any handling after writing PTEs (something which was likely buggy before). This simplified error handling I suspect will be helpful when we move to deferred/dynamic page table allocation and mapping. The patches also attempts to break up some of the steps into more logical reviewable chunks, particularly when we free. v2: Don't call cleanup on the error path since that takes down the drm_mm and list entry, which aren't setup at this point. v3: Fixes addressing Imre's comments from: <1392821989.19792.13.camel@intelbox> Don't do dynamic allocation for the page table DMA addresses. I can't remember why I did it in the first place. This addresses one of Imre's other issues. Fix error path leak of page tables. v4: Fix the fix of the error path leak. Original fix still leaked page tables. (Imre) Reviewed-by: Imre Deak Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 110 +++++++++++++++------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 49e79fbee45d2..2bc07fb447bdb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -332,6 +332,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) { + struct pci_dev *hwdev = ppgtt->base.dev->pdev; int i, j; for (i = 0; i < ppgtt->num_pd_pages; i++) { @@ -340,18 +341,14 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) if (!ppgtt->pd_dma_addr[i]) continue; - pci_unmap_page(ppgtt->base.dev->pdev, - ppgtt->pd_dma_addr[i], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; if (addr) - pci_unmap_page(ppgtt->base.dev->pdev, - addr, - PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - + pci_unmap_page(hwdev, addr, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); } } } @@ -369,27 +366,27 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) } /** - * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a - * net effect resembling a 2-level page table in normal x86 terms. Each PDP - * represents 1GB of memory - * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers + * with a net effect resembling a 2-level page table in normal x86 terms. Each + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address + * space. * + * FIXME: split allocation into smaller pieces. For now we only ever do this + * once, but with full PPGTT, the multiple contiguous allocations will be bad. * TODO: Do something with the size parameter - **/ + */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) { struct page *pt_pages; - int i, j, ret = -ENOMEM; const int max_pdp = DIV_ROUND_UP(size, 1 << 30); const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + struct pci_dev *hwdev = ppgtt->base.dev->pdev; + int i, j, ret; if (size % (1<<30)) DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); - /* FIXME: split allocation into smaller pieces. For now we only ever do - * this once, but with full PPGTT, the multiple contiguous allocations - * will be bad. - */ + /* 1. Do all our allocations for page directories and page tables */ ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); if (!ppgtt->pd_pages) return -ENOMEM; @@ -404,52 +401,56 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; - ppgtt->enable = gen8_ppgtt_enable; - ppgtt->switch_mm = gen8_mm_switch; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.start = 0; - ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; - BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); + for (i = 0; i < max_pdp; i++) { + ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, + sizeof(dma_addr_t), + GFP_KERNEL); + if (!ppgtt->gen8_pt_dma_addr[i]) { + ret = -ENOMEM; + goto bail; + } + } + /* - * - Create a mapping for the page directories. - * - For each page directory: - * allocate space for page table mappings. - * map each page table + * 2. Create all the DMA mappings for the page directories and page + * tables */ for (i = 0; i < max_pdp; i++) { - dma_addr_t temp; - temp = pci_map_page(ppgtt->base.dev->pdev, - &ppgtt->pd_pages[i], 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) - goto err_out; + dma_addr_t pd_addr, pt_addr; - ppgtt->pd_dma_addr[i] = temp; + /* Get the page directory mappings */ + pd_addr = pci_map_page(hwdev, &ppgtt->pd_pages[i], 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); + if (ret) + goto bail; - ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); - if (!ppgtt->gen8_pt_dma_addr[i]) - goto err_out; + ppgtt->pd_dma_addr[i] = pd_addr; + /* And the page table mappings per page directory */ for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; - temp = pci_map_page(ppgtt->base.dev->pdev, - p, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) - goto err_out; + pt_addr = pci_map_page(hwdev, p, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + ret = pci_dma_mapping_error(hwdev, pt_addr); + if (ret) + goto bail; - ppgtt->gen8_pt_dma_addr[i][j] = temp; + ppgtt->gen8_pt_dma_addr[i][j] = pt_addr; } } - /* For now, the PPGTT helper functions all require that the PDEs are + /* + * 3. Map all the page directory entires to point to the page tables + * we've allocated. + * + * For now, the PPGTT helper functions all require that the PDEs are * plugged in correctly. So we do that now/here. For aliasing PPGTT, we - * will never need to touch the PDEs again */ + * will never need to touch the PDEs again. + */ for (i = 0; i < max_pdp; i++) { gen8_ppgtt_pde_t *pd_vaddr; pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); @@ -461,6 +462,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) kunmap_atomic(pd_vaddr); } + ppgtt->enable = gen8_ppgtt_enable; + ppgtt->switch_mm = gen8_mm_switch; + ppgtt->base.clear_range = gen8_ppgtt_clear_range; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.start = 0; + ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, true); @@ -473,8 +482,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) size % (1<<30)); return 0; -err_out: - ppgtt->base.cleanup(&ppgtt->base); +bail: + gen8_ppgtt_unmap_pages(ppgtt); + gen8_ppgtt_free(ppgtt); return ret; } From bf2b4ed291a8a3681ba0cb50b6ba5423c47b9fac Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 19 Feb 2014 22:05:43 -0800 Subject: [PATCH 016/107] drm/i915/bdw: Split ppgtt initialization up Like cleanup in an earlier patch, the code becomes much more readable, and easier to extend if we extract out helper functions for the various stages of init. Note that with this patch it becomes really simple, and tempting to begin using the 'goto out' idiom with explicit free/fini semantics. I've kept the error path as similar as possible to the cleanup() function to make sure cleanup is as robust as possible v2: Remove comment "NB:From here on, ppgtt->base.cleanup() should function properly" Update commit message to reflect above v3: Rebased on top of bugfixes found in the previous patch by Imre Moved number of pd pages assertion to the proper place (Imre) v4: Allocate dma address space for num_pd_pages, not num_pd_entries (Ben) Don't use gen8_pt_dma_addr after free on error path (Imre) With new fix from v4 of the previous patch. Signed-off-by: Ben Widawsky Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 164 ++++++++++++++++++++-------- 1 file changed, 116 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2bc07fb447bdb..beca57154d6f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -365,6 +365,113 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_ppgtt_free(ppgtt); } +static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, + const int max_pdp) +{ + struct page *pt_pages; + const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + + pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); + if (!pt_pages) + return -ENOMEM; + + ppgtt->gen8_pt_pages = pt_pages; + ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); + + return 0; +} + +static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for (i = 0; i < ppgtt->num_pd_pages; i++) { + ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, + sizeof(dma_addr_t), + GFP_KERNEL); + if (!ppgtt->gen8_pt_dma_addr[i]) + return -ENOMEM; + } + + return 0; +} + +static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, + const int max_pdp) +{ + ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); + if (!ppgtt->pd_pages) + return -ENOMEM; + + ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); + BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); + + return 0; +} + +static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, + const int max_pdp) +{ + int ret; + + ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); + if (ret) + return ret; + + ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); + if (ret) { + __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); + return ret; + } + + ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; + + ret = gen8_ppgtt_allocate_dma(ppgtt); + if (ret) + gen8_ppgtt_free(ppgtt); + + return ret; +} + +static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, + const int pd) +{ + dma_addr_t pd_addr; + int ret; + + pd_addr = pci_map_page(ppgtt->base.dev->pdev, + &ppgtt->pd_pages[pd], 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); + if (ret) + return ret; + + ppgtt->pd_dma_addr[pd] = pd_addr; + + return 0; +} + +static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, + const int pd, + const int pt) +{ + dma_addr_t pt_addr; + struct page *p; + int ret; + + p = &ppgtt->gen8_pt_pages[pd * GEN8_PDES_PER_PAGE + pt]; + pt_addr = pci_map_page(ppgtt->base.dev->pdev, + p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); + if (ret) + return ret; + + ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; + + return 0; +} + /** * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -377,69 +484,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) { - struct page *pt_pages; const int max_pdp = DIV_ROUND_UP(size, 1 << 30); - const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; - struct pci_dev *hwdev = ppgtt->base.dev->pdev; + const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; int i, j, ret; if (size % (1<<30)) DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); - /* 1. Do all our allocations for page directories and page tables */ - ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); - if (!ppgtt->pd_pages) - return -ENOMEM; - - pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); - if (!pt_pages) { - __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); - return -ENOMEM; - } - - ppgtt->gen8_pt_pages = pt_pages; - ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); - ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); - ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; - BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); - - for (i = 0; i < max_pdp; i++) { - ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, - sizeof(dma_addr_t), - GFP_KERNEL); - if (!ppgtt->gen8_pt_dma_addr[i]) { - ret = -ENOMEM; - goto bail; - } - } + /* 1. Do all our allocations for page directories and page tables. */ + ret = gen8_ppgtt_alloc(ppgtt, max_pdp); + if (ret) + return ret; /* - * 2. Create all the DMA mappings for the page directories and page - * tables + * 2. Create DMA mappings for the page directories and page tables. */ for (i = 0; i < max_pdp; i++) { - dma_addr_t pd_addr, pt_addr; - - /* Get the page directory mappings */ - pd_addr = pci_map_page(hwdev, &ppgtt->pd_pages[i], 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); + ret = gen8_ppgtt_setup_page_directories(ppgtt, i); if (ret) goto bail; - ppgtt->pd_dma_addr[i] = pd_addr; - - /* And the page table mappings per page directory */ for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { - struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; - - pt_addr = pci_map_page(hwdev, p, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - ret = pci_dma_mapping_error(hwdev, pt_addr); + ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); if (ret) goto bail; - - ppgtt->gen8_pt_dma_addr[i][j] = pt_addr; } } @@ -478,7 +546,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", ppgtt->num_pt_pages, - (ppgtt->num_pt_pages - num_pt_pages) + + (ppgtt->num_pt_pages - min_pt_pages) + size % (1<<30)); return 0; From 782f149523d31a24a54e4abd2db6ce2b2106af8d Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 20 Feb 2014 11:50:33 -0800 Subject: [PATCH 017/107] drm/i915: Make clear/insert vfuncs args absolute This patch converts insert_entries and clear_range, both functions which are specific to the VM. These functions tend to encapsulate the gen specific PTE writes. Passing absolute addresses to the insert_entries, and clear_range will help make the logic clearer within the functions as to what's going on. Currently, all callers simply do the appropriate page shift, which IMO, ends up looking weird with an upcoming change for the gen8 page table allocations. Up until now, the PPGTT was a funky 2 level page table. GEN8 changes this to look more like a 3 level page table, and to that extent we need a significant amount more memory simply for the page tables. To address this, the allocations will be split up in finer amounts. v2: Replace size_t with uint64_t (Chris, Imre) v3: Fix size in gen8_ppgtt_init (Ben) Fix Size in i915_gem_suspend_gtt_mappings/restore (Imre) Reviewed-by: Imre Deak (v2) Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 90 ++++++++++++++++------------- 2 files changed, 54 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f732433ea75e3..9512c0b1f63b7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -652,12 +652,12 @@ struct i915_address_space { enum i915_cache_level level, bool valid); /* Create a valid PTE */ void (*clear_range)(struct i915_address_space *vm, - unsigned int first_entry, - unsigned int num_entries, + uint64_t start, + uint64_t length, bool use_scratch); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, - unsigned int first_entry, + uint64_t start, enum i915_cache_level cache_level); void (*cleanup)(struct i915_address_space *vm); }; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index beca57154d6f3..03a387162b535 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -254,13 +254,15 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, } static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - unsigned first_entry, - unsigned num_entries, + uint64_t start, + uint64_t length, bool use_scratch) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr, scratch_pte; + unsigned first_entry = start >> PAGE_SHIFT; + unsigned num_entries = length >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE; unsigned last_pte, i; @@ -290,12 +292,13 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - unsigned first_entry, + uint64_t start, enum i915_cache_level cache_level) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr; + unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; struct sg_page_iter sg_iter; @@ -539,7 +542,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; ppgtt->base.clear_range(&ppgtt->base, 0, - ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, + ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE, true); DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", @@ -854,13 +857,15 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - unsigned first_entry, - unsigned num_entries, + uint64_t start, + uint64_t length, bool use_scratch) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr, scratch_pte; + unsigned first_entry = start >> PAGE_SHIFT; + unsigned num_entries = length >> PAGE_SHIFT; unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; unsigned last_pte, i; @@ -887,12 +892,13 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - unsigned first_entry, + uint64_t start, enum i915_cache_level cache_level) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr; + unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; struct sg_page_iter sg_iter; @@ -1024,8 +1030,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->pt_dma_addr[i] = pt_addr; } - ppgtt->base.clear_range(&ppgtt->base, 0, - ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); ppgtt->debug_dump = gen6_dump_ppgtt; DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", @@ -1089,20 +1094,17 @@ ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - const unsigned long entry = vma->node.start >> PAGE_SHIFT; - WARN_ON(flags); - vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level); + vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, + cache_level); } static void ppgtt_unbind_vma(struct i915_vma *vma) { - const unsigned long entry = vma->node.start >> PAGE_SHIFT; - vma->vm->clear_range(vma->vm, - entry, - vma->obj->base.size >> PAGE_SHIFT, + vma->node.start, + vma->obj->base.size, true); } @@ -1186,8 +1188,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev) i915_check_and_clear_faults(dev); dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, - dev_priv->gtt.base.start / PAGE_SIZE, - dev_priv->gtt.base.total / PAGE_SIZE, + dev_priv->gtt.base.start, + dev_priv->gtt.base.total, false); } @@ -1201,8 +1203,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) /* First fill our portion of the GTT with scratch pages */ dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, - dev_priv->gtt.base.start / PAGE_SIZE, - dev_priv->gtt.base.total / PAGE_SIZE, + dev_priv->gtt.base.start, + dev_priv->gtt.base.total, true); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { @@ -1263,10 +1265,11 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - unsigned int first_entry, + uint64_t start, enum i915_cache_level level) { struct drm_i915_private *dev_priv = vm->dev->dev_private; + unsigned first_entry = start >> PAGE_SHIFT; gen8_gtt_pte_t __iomem *gtt_entries = (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; int i = 0; @@ -1308,10 +1311,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - unsigned int first_entry, + uint64_t start, enum i915_cache_level level) { struct drm_i915_private *dev_priv = vm->dev->dev_private; + unsigned first_entry = start >> PAGE_SHIFT; gen6_gtt_pte_t __iomem *gtt_entries = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; int i = 0; @@ -1343,11 +1347,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, } static void gen8_ggtt_clear_range(struct i915_address_space *vm, - unsigned int first_entry, - unsigned int num_entries, + uint64_t start, + uint64_t length, bool use_scratch) { struct drm_i915_private *dev_priv = vm->dev->dev_private; + unsigned first_entry = start >> PAGE_SHIFT; + unsigned num_entries = length >> PAGE_SHIFT; gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; @@ -1367,11 +1373,13 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, } static void gen6_ggtt_clear_range(struct i915_address_space *vm, - unsigned int first_entry, - unsigned int num_entries, + uint64_t start, + uint64_t length, bool use_scratch) { struct drm_i915_private *dev_priv = vm->dev->dev_private; + unsigned first_entry = start >> PAGE_SHIFT; + unsigned num_entries = length >> PAGE_SHIFT; gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; @@ -1404,10 +1412,12 @@ static void i915_ggtt_bind_vma(struct i915_vma *vma, } static void i915_ggtt_clear_range(struct i915_address_space *vm, - unsigned int first_entry, - unsigned int num_entries, + uint64_t start, + uint64_t length, bool unused) { + unsigned first_entry = start >> PAGE_SHIFT; + unsigned num_entries = length >> PAGE_SHIFT; intel_gtt_clear_range(first_entry, num_entries); } @@ -1428,7 +1438,6 @@ static void ggtt_bind_vma(struct i915_vma *vma, struct drm_device *dev = vma->vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj = vma->obj; - const unsigned long entry = vma->node.start >> PAGE_SHIFT; /* If there is no aliasing PPGTT, or the caller needs a global mapping, * or we have a global mapping already but the cacheability flags have @@ -1444,7 +1453,8 @@ static void ggtt_bind_vma(struct i915_vma *vma, if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { if (!obj->has_global_gtt_mapping || (cache_level != obj->cache_level)) { - vma->vm->insert_entries(vma->vm, obj->pages, entry, + vma->vm->insert_entries(vma->vm, obj->pages, + vma->node.start, cache_level); obj->has_global_gtt_mapping = 1; } @@ -1455,7 +1465,9 @@ static void ggtt_bind_vma(struct i915_vma *vma, (cache_level != obj->cache_level))) { struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, - vma->obj->pages, entry, cache_level); + vma->obj->pages, + vma->node.start, + cache_level); vma->obj->has_aliasing_ppgtt_mapping = 1; } } @@ -1465,11 +1477,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma) struct drm_device *dev = vma->vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj = vma->obj; - const unsigned long entry = vma->node.start >> PAGE_SHIFT; if (obj->has_global_gtt_mapping) { - vma->vm->clear_range(vma->vm, entry, - vma->obj->base.size >> PAGE_SHIFT, + vma->vm->clear_range(vma->vm, + vma->node.start, + obj->base.size, true); obj->has_global_gtt_mapping = 0; } @@ -1477,8 +1489,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma) if (obj->has_aliasing_ppgtt_mapping) { struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; appgtt->base.clear_range(&appgtt->base, - entry, - obj->base.size >> PAGE_SHIFT, + vma->node.start, + obj->base.size, true); obj->has_aliasing_ppgtt_mapping = 0; } @@ -1563,14 +1575,14 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { - const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", hole_start, hole_end); - ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); + ggtt_vm->clear_range(ggtt_vm, hole_start, + hole_end - hole_start, true); } /* And finally clear the reserved guard page */ - ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); + ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); } void i915_gem_init_global_gtt(struct drm_device *dev) From 7ad47cf252d432c8006589fa23de4ed61f1ac30f Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 20 Feb 2014 11:51:21 -0800 Subject: [PATCH 018/107] drm/i915/bdw: Reorganize PT allocations The previous allocation mechanism would get 2 contiguous allocations, one for the page directories, and one for the page tables. As each page table is 1 page, and there are 512 of these per page directory, this goes to 2MB. An unfriendly request at best. Worse still, our HW now supports 4 page directories, and a 2MB allocation is not allowed. In order to fix this, this patch attempts to split up each page table allocation into a single, discrete allocation. There is nothing really fancy about the patch itself, it just has to manage an extra pointer indirection, and have a fancier bit of logic to free up the pages. To accommodate some of the added complexity, two new helpers are introduced to allocate, and free the page table pages. NOTE: I really wanted to split the way we do allocations, and the way in which we identify the page table/page directory being used. I found splitting this functionality up to be too unwieldy. I apologize in advance to the reviewer. I'd recommend looking at the result, rather than the diff. v2/NOTE2: This patch predated commit: 6f1cc993518462ccf039e195fabd47e7aa5bfd13 Author: Chris Wilson Date: Tue Dec 31 15:50:31 2013 +0000 drm/i915: Avoid dereference past end of page arr It fixed the same issue as that patch, but because of the limbo state of PPGTT, Chris patch was merged instead. The excess churn is a result of my using my original patch, which has my preferred naming. Primarily act_* is changed to which_*, but it's mostly the same otherwise. I've kept the convention Chris used for the pte wrap (I had something slightly different, and broken - but fixable) v3: Rename which_p[..]e to drop which_ (Chris) Remove BUG_ON in inner loop (Chris) Redo the pde/pdpe wrap logic (Chris) v4: s/1MB/2MB in commit message (Imre) Plug leaking gen8_pt_pages in both the error path, as well as general free case (Imre) v5: Rename leftover "which_" variables (Imre) Add the pde = 0 wrap that was missed from v3 (Imre) Reviewed-by: Imre Deak Signed-off-by: Ben Widawsky [danvet: Squash in fixup from Ben.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 132 ++++++++++++++++++++++------ 2 files changed, 108 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9512c0b1f63b7..0a2002479f8b1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -691,6 +691,7 @@ struct i915_gtt { }; #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) +#define GEN8_LEGACY_PDPS 4 struct i915_hw_ppgtt { struct i915_address_space base; struct kref ref; @@ -698,14 +699,14 @@ struct i915_hw_ppgtt { unsigned num_pd_entries; union { struct page **pt_pages; - struct page *gen8_pt_pages; + struct page **gen8_pt_pages[GEN8_LEGACY_PDPS]; }; struct page *pd_pages; int num_pd_pages; int num_pt_pages; union { uint32_t pd_offset; - dma_addr_t pd_dma_addr[4]; + dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS]; }; union { dma_addr_t *pt_dma_addr; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 03a387162b535..862ae371697a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -64,7 +64,19 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) -#define GEN8_LEGACY_PDPS 4 + +/* GEN8 legacy style addressis defined as a 3 level page table: + * 31:30 | 29:21 | 20:12 | 11:0 + * PDPE | PDE | PTE | offset + * The difference as compared to normal x86 3 level page table is the PDPEs are + * programmed via register. + */ +#define GEN8_PDPE_SHIFT 30 +#define GEN8_PDPE_MASK 0x3 +#define GEN8_PDE_SHIFT 21 +#define GEN8_PDE_MASK 0x1ff +#define GEN8_PTE_SHIFT 12 +#define GEN8_PTE_MASK 0x1ff #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -261,32 +273,36 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; + unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; + unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; + unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; - unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE; unsigned last_pte, i; scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, I915_CACHE_LLC, use_scratch); while (num_entries) { - struct page *page_table = &ppgtt->gen8_pt_pages[act_pt]; + struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; - last_pte = first_pte + num_entries; + last_pte = pte + num_entries; if (last_pte > GEN8_PTES_PER_PAGE) last_pte = GEN8_PTES_PER_PAGE; pt_vaddr = kmap_atomic(page_table); - for (i = first_pte; i < last_pte; i++) + for (i = pte; i < last_pte; i++) { pt_vaddr[i] = scratch_pte; + num_entries--; + } kunmap_atomic(pt_vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + pte = 0; + if (++pde == GEN8_PDES_PER_PAGE) { + pdpe++; + pde = 0; + } } } @@ -298,38 +314,59 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; - unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; + unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; + unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; + unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; struct sg_page_iter sg_iter; pt_vaddr = NULL; + for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { + if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) + break; + if (pt_vaddr == NULL) - pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); + pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); - pt_vaddr[act_pte] = + pt_vaddr[pte] = gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), cache_level, true); - if (++act_pte == GEN8_PTES_PER_PAGE) { + if (++pte == GEN8_PTES_PER_PAGE) { kunmap_atomic(pt_vaddr); pt_vaddr = NULL; - act_pt++; - act_pte = 0; + if (++pde == GEN8_PDES_PER_PAGE) { + pdpe++; + pde = 0; + } + pte = 0; } } if (pt_vaddr) kunmap_atomic(pt_vaddr); } -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) +static void gen8_free_page_tables(struct page **pt_pages) { int i; - for (i = 0; i < ppgtt->num_pd_pages ; i++) + if (pt_pages == NULL) + return; + + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) + if (pt_pages[i]) + __free_pages(pt_pages[i], 0); +} + +static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for (i = 0; i < ppgtt->num_pd_pages; i++) { + gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); + kfree(ppgtt->gen8_pt_pages[i]); kfree(ppgtt->gen8_pt_dma_addr[i]); + } - __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT)); __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); } @@ -368,20 +405,61 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_ppgtt_free(ppgtt); } +static struct page **__gen8_alloc_page_tables(void) +{ + struct page **pt_pages; + int i; + + pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); + if (!pt_pages) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { + pt_pages[i] = alloc_page(GFP_KERNEL); + if (!pt_pages[i]) + goto bail; + } + + return pt_pages; + +bail: + gen8_free_page_tables(pt_pages); + kfree(pt_pages); + return ERR_PTR(-ENOMEM); +} + static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, const int max_pdp) { - struct page *pt_pages; + struct page **pt_pages[GEN8_LEGACY_PDPS]; const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + int i, ret; - pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); - if (!pt_pages) - return -ENOMEM; + for (i = 0; i < max_pdp; i++) { + pt_pages[i] = __gen8_alloc_page_tables(); + if (IS_ERR(pt_pages[i])) { + ret = PTR_ERR(pt_pages[i]); + goto unwind_out; + } + } + + /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, + * "atomic" - for cleanup purposes. + */ + for (i = 0; i < max_pdp; i++) + ppgtt->gen8_pt_pages[i] = pt_pages[i]; - ppgtt->gen8_pt_pages = pt_pages; ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); return 0; + +unwind_out: + while (i--) { + gen8_free_page_tables(pt_pages[i]); + kfree(pt_pages[i]); + } + + return ret; } static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) @@ -463,7 +541,7 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, struct page *p; int ret; - p = &ppgtt->gen8_pt_pages[pd * GEN8_PDES_PER_PAGE + pt]; + p = ppgtt->gen8_pt_pages[pd][pt]; pt_addr = pci_map_page(ppgtt->base.dev->pdev, p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); From 7907f45bf9f67a1c5e5d4ae05bab428d7c2f43b2 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 19 Feb 2014 22:05:46 -0800 Subject: [PATCH 019/107] Revert "drm/i915/bdw: Limit GTT to 2GB" This reverts commit 3a2ffb65eec6dbda2fd8151894f51c18b42c8d41. Now that the code is fixed to use smaller allocations, it should be safe to let the full GGTT be used on BDW. The testcase for this is anything which uses more than half of the GTT, thus eclipsing the old limit. Reviewed-by: Imre Deak Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 862ae371697a6..faa0319b8e465 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1725,11 +1725,6 @@ static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; if (bdw_gmch_ctl) bdw_gmch_ctl = 1 << bdw_gmch_ctl; - if (bdw_gmch_ctl > 4) { - WARN_ON(!i915.preliminary_hw_support); - return 4<<20; - } - return bdw_gmch_ctl << 20; } From c4ac524c15fdf3d1f221bfa69da184e6a797a927 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 19 Feb 2014 22:05:47 -0800 Subject: [PATCH 020/107] drm/i915: Update i915_gem_gtt.c copyright I keep meaning to do this... by now almost the entire file has been written by an Intel employee (including Daniel post-2010). Reviewed-by: Imre Deak Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index faa0319b8e465..1b20cf7b323b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1,5 +1,6 @@ /* * Copyright © 2010 Daniel Vetter + * Copyright © 2011-2014 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), From a00d825de9284922a4977eaa7d513f88044ccc4a Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 19 Feb 2014 22:05:48 -0800 Subject: [PATCH 021/107] drm/i915: Split GEN6 PPGTT cleanup This cleanup is similar to the GEN8 cleanup (though less necessary). Having everything split will make cleaning the initialization path error paths easier to understand. Reviewed-by: Chris Wilson Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1b20cf7b323b3..6b027f5cbd9a7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1001,22 +1001,21 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, kunmap_atomic(pt_vaddr); } -static void gen6_ppgtt_cleanup(struct i915_address_space *vm) +static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); int i; - list_del(&vm->global_link); - drm_mm_takedown(&ppgtt->base.mm); - drm_mm_remove_node(&ppgtt->node); - if (ppgtt->pt_dma_addr) { for (i = 0; i < ppgtt->num_pd_entries; i++) pci_unmap_page(ppgtt->base.dev->pdev, ppgtt->pt_dma_addr[i], 4096, PCI_DMA_BIDIRECTIONAL); } +} + +static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) +{ + int i; kfree(ppgtt->pt_dma_addr); for (i = 0; i < ppgtt->num_pd_entries; i++) @@ -1024,6 +1023,19 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) kfree(ppgtt->pt_pages); } +static void gen6_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + list_del(&vm->global_link); + drm_mm_takedown(&ppgtt->base.mm); + drm_mm_remove_node(&ppgtt->node); + + gen6_ppgtt_unmap_pages(ppgtt); + gen6_ppgtt_free(ppgtt); +} + static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { #define GEN6_PD_ALIGN (PAGE_SIZE * 16) From b146520ff9130bdc6c32e4a282d61576605e64a2 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 19 Feb 2014 22:05:49 -0800 Subject: [PATCH 022/107] drm/i915: Split GEN6 PPGTT initialization up Simply to match the GEN8 style of PPGTT initialization, split up the allocations and mappings. Unlike GEN8, we skip a separate dma_addr_t allocation function, as it is much simpler pre-gen8. With this code it would be easy to make a more general PPGTT initialization function with per GEN alloc/map/etc. or use a common helper, similar to the ringbuffer code. I don't see a benefit to doing this just yet, but who knows... Reviewed-by: Chris Wilson Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 141 ++++++++++++++++++---------- 1 file changed, 91 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6b027f5cbd9a7..f16c6ae2df637 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1036,14 +1036,14 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_ppgtt_free(ppgtt); } -static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) +static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) { #define GEN6_PD_ALIGN (PAGE_SIZE * 16) #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE) struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; bool retried = false; - int i, ret; + int ret; /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page @@ -1070,42 +1070,60 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < dev_priv->gtt.mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); - ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; - if (IS_GEN6(dev)) { - ppgtt->enable = gen6_ppgtt_enable; - ppgtt->switch_mm = gen6_mm_switch; - } else if (IS_HASWELL(dev)) { - ppgtt->enable = gen7_ppgtt_enable; - ppgtt->switch_mm = hsw_mm_switch; - } else if (IS_GEN7(dev)) { - ppgtt->enable = gen7_ppgtt_enable; - ppgtt->switch_mm = gen7_mm_switch; - } else - BUG(); - ppgtt->base.clear_range = gen6_ppgtt_clear_range; - ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->base.cleanup = gen6_ppgtt_cleanup; - ppgtt->base.scratch = dev_priv->gtt.base.scratch; - ppgtt->base.start = 0; - ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; + return ret; +} + +static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) +{ + int i; + ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), GFP_KERNEL); - if (!ppgtt->pt_pages) { - drm_mm_remove_node(&ppgtt->node); + + if (!ppgtt->pt_pages) return -ENOMEM; - } for (i = 0; i < ppgtt->num_pd_entries; i++) { ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); - if (!ppgtt->pt_pages[i]) - goto err_pt_alloc; + if (!ppgtt->pt_pages[i]) { + gen6_ppgtt_free(ppgtt); + return -ENOMEM; + } + } + + return 0; +} + +static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) +{ + int ret; + + ret = gen6_ppgtt_allocate_page_directories(ppgtt); + if (ret) + return ret; + + ret = gen6_ppgtt_allocate_page_tables(ppgtt); + if (ret) { + drm_mm_remove_node(&ppgtt->node); + return ret; } ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), GFP_KERNEL); - if (!ppgtt->pt_dma_addr) - goto err_pt_alloc; + if (!ppgtt->pt_dma_addr) { + drm_mm_remove_node(&ppgtt->node); + gen6_ppgtt_free(ppgtt); + return -ENOMEM; + } + + return 0; +} + +static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + int i; for (i = 0; i < ppgtt->num_pd_entries; i++) { dma_addr_t pt_addr; @@ -1114,40 +1132,63 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(dev->pdev, pt_addr)) { - ret = -EIO; - goto err_pd_pin; - + gen6_ppgtt_unmap_pages(ppgtt); + return -EIO; } + ppgtt->pt_dma_addr[i] = pt_addr; } - ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); + return 0; +} + +static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; + if (IS_GEN6(dev)) { + ppgtt->enable = gen6_ppgtt_enable; + ppgtt->switch_mm = gen6_mm_switch; + } else if (IS_HASWELL(dev)) { + ppgtt->enable = gen7_ppgtt_enable; + ppgtt->switch_mm = hsw_mm_switch; + } else if (IS_GEN7(dev)) { + ppgtt->enable = gen7_ppgtt_enable; + ppgtt->switch_mm = gen7_mm_switch; + } else + BUG(); + + ret = gen6_ppgtt_alloc(ppgtt); + if (ret) + return ret; + + ret = gen6_ppgtt_setup_page_tables(ppgtt); + if (ret) { + gen6_ppgtt_free(ppgtt); + return ret; + } + + ppgtt->base.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.cleanup = gen6_ppgtt_cleanup; + ppgtt->base.scratch = dev_priv->gtt.base.scratch; + ppgtt->base.start = 0; + ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; - DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", - ppgtt->node.size >> 20, - ppgtt->node.start / PAGE_SIZE); ppgtt->pd_offset = ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); - return 0; + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); -err_pd_pin: - if (ppgtt->pt_dma_addr) { - for (i--; i >= 0; i--) - pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i], - 4096, PCI_DMA_BIDIRECTIONAL); - } -err_pt_alloc: - kfree(ppgtt->pt_dma_addr); - for (i = 0; i < ppgtt->num_pd_entries; i++) { - if (ppgtt->pt_pages[i]) - __free_page(ppgtt->pt_pages[i]); - } - kfree(ppgtt->pt_pages); - drm_mm_remove_node(&ppgtt->node); + DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", + ppgtt->node.size >> 20, + ppgtt->node.start / PAGE_SIZE); - return ret; + return 0; } int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) From 5abbcca30d69836df38527cb705c15bbe64712f8 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 21 Feb 2014 13:06:34 -0800 Subject: [PATCH 023/107] drm/i915/bdw: Kill ppgtt->num_pt_pages With the original PPGTT implementation if the number of PDPs was not a power of two, the number of pages for the page tables would end up being rounded up. The code actually had a bug here afaict, but this is a theoretical bug as I don't believe this can actually occur with the current code/HW.. With the rework of the page table allocations, there is no longer a distinction between number of page table pages, and number of page directory entries. To avoid confusion, kill the redundant (and newer) struct member. Cc: Imre Deak Signed-off-by: Ben Widawsky Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 ++++---------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 92dd2067fb08e..f3015034fd2d3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1759,7 +1759,7 @@ static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev) return; seq_printf(m, "Page directories: %d\n", ppgtt->num_pd_pages); - seq_printf(m, "Page tables: %d\n", ppgtt->num_pt_pages); + seq_printf(m, "Page tables: %d\n", ppgtt->num_pd_entries); for_each_ring(ring, dev_priv, unused) { seq_printf(m, "%s\n", ring->name); for (i = 0; i < 4; i++) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a2002479f8b1..c942dbfbf003c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -697,13 +697,12 @@ struct i915_hw_ppgtt { struct kref ref; struct drm_mm_node node; unsigned num_pd_entries; + unsigned num_pd_pages; /* gen8+ */ union { struct page **pt_pages; struct page **gen8_pt_pages[GEN8_LEGACY_PDPS]; }; struct page *pd_pages; - int num_pd_pages; - int num_pt_pages; union { uint32_t pd_offset; dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS]; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f16c6ae2df637..a616cac0f1610 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -433,7 +433,6 @@ static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, const int max_pdp) { struct page **pt_pages[GEN8_LEGACY_PDPS]; - const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; int i, ret; for (i = 0; i < max_pdp; i++) { @@ -450,8 +449,6 @@ static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, for (i = 0; i < max_pdp; i++) ppgtt->gen8_pt_pages[i] = pt_pages[i]; - ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); - return 0; unwind_out: @@ -618,18 +615,15 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.start = 0; - ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; + ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; - ppgtt->base.clear_range(&ppgtt->base, 0, - ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE, - true); + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", - ppgtt->num_pt_pages, - (ppgtt->num_pt_pages - min_pt_pages) + - size % (1<<30)); + ppgtt->num_pd_entries, + (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); return 0; bail: From 47e74f0fd12dca6981cbcbdd710899867115c692 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 19 Feb 2014 13:09:31 -0800 Subject: [PATCH 024/107] drm/i915: Revert workaround for disabling L3 cache aging on BYT V2: edit the commit message to contain more info The W/A spreadsheet says this is still required, but the b-spec says it's not for BYT-T. So the documentation is not clear. However, our experience with the other SKUs of BYT-I/M on Android and Linux suggests that setting this bit actually causes GPU hang for certain OGL benchmark applications. Removing this bit completely resolves the GPU hangs. Signed-off-by: Sinclair Yeh Acked-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3e754fec58b5d..d668866ba2eb6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5043,9 +5043,6 @@ static void valleyview_init_clock_gating(struct drm_device *dev) _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - /* WaDisableL3CacheAging:vlv */ - I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS); - /* WaForceL3Serialization:vlv */ I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & ~L3SQ_URB_READ_CAM_MATCH_DISABLE); From 1af8452f1644acdef85a587c5e8264807f9b83a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 14 Feb 2014 22:34:43 +0000 Subject: [PATCH 025/107] drm/i915: Revert workaround for disabling L3 cache aging on IVB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit e4e0c058a19c41150d12ad2d3023b3cf09c5de67 Author: Eugeni Dodonov Date: Wed Feb 8 12:53:50 2012 -0800 drm/i915: gen7: Implement an L3 caching workaround. the L3 cache aging was disabled. This was part of a shotgun response to a number of GPU hang bugs, but there appears to be no documentation to suggest that disabling the L3 cache age was ever required (to prevent the GPU hangs). Restoring the L3 cache age is a minor performance win of around 2% on IVB:GT2. (Note that this value seems to be consistent across a number of tests and so appears to be above the usual noise.) Signed-off-by: Chris Wilson Cc: Kenneth Graunke Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5e832b1fd9d62..e3130352dfaa6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4169,7 +4169,7 @@ #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000 #define GEN7_L3CNTLREG1 0xB01C -#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C4FFF8C +#define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C #define GEN7_L3AGDIS (1<<19) #define GEN7_L3_CHICKEN_MODE_REGISTER 0xB030 From ef34ab894e54c97850296b5f8268004bf6788c74 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 20 Feb 2014 12:28:07 -0800 Subject: [PATCH 026/107] drm/i915: honor forced connector modes v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the move over to use BIOS connector configs, we lost the ability to force a specific set of connectors on or off. Try to remedy that by dropping back to the old behavior if we detect a hard coded connector config. v2: don't deref connector state for disabled connectors (Jesse) Reported-by: Ville Syrjälä Signed-off-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_fbdev.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 19be4bfbcc595..4e4b461e0a70c 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -291,6 +291,24 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *save_enabled; bool any_enabled = false; + /* + * If the user specified any force options, just bail here + * and use that config. + */ + for (i = 0; i < fb_helper->connector_count; i++) { + struct drm_fb_helper_connector *fb_conn; + struct drm_connector *connector; + + fb_conn = fb_helper->connector_info[i]; + connector = fb_conn->connector; + + if (!enabled[i]) + continue; + + if (connector->force != DRM_FORCE_UNSPECIFIED) + return false; + } + save_enabled = kcalloc(dev->mode_config.num_connector, sizeof(bool), GFP_KERNEL); if (!save_enabled) From 8b687df4c305d3fd6b619dade83f95c4b860c74b Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 21 Feb 2014 13:13:39 -0800 Subject: [PATCH 027/107] drm/i915: re-add locking around hw state readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To silence locking complaints. This was a rebase failure on my part in commit fa9fa083d0606cb323f6105c17702460ea0a6780 Author: Jesse Barnes Date: Tue Feb 11 15:28:56 2014 -0800 drm/i915: read out hw state earlier v2 Reported-by: Ville Syrjälä Signed-off-by: Jesse Barnes Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8d316c8bdf8fa..924f3cee4badb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11042,7 +11042,9 @@ void intel_modeset_init(struct drm_device *dev) /* Just in case the BIOS is doing something questionable. */ intel_disable_fbc(dev); + mutex_lock(&dev->mode_config.mutex); intel_modeset_setup_hw_state(dev, false); + mutex_unlock(&dev->mode_config.mutex); } static void From 227213438aff3050f2dd2be92dc48c8370d445a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Mar 2014 09:41:43 +0000 Subject: [PATCH 028/107] Revert "drm/i915: enable HiZ Raw Stall Optimization on IVB" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 116f2b6da868dec7539103574d0421cd6221e931. This optimization causes widespread corruption in games, and even in glxgears, on my ivb:gt1. The corruption appears like z-fighting of overlapping polygons in the HiZ buffer. The observation ties in very closely with the description of the optimization disabled by default on IVB: "The Hierarchical Z RAW Stall Optimization allows non-overlapping polygons in the same 8x4 pixel/sample area to be processed without stalling waiting for the earlier ones to write to Hierarchical Z buffer." No reason is given for why it is disabled by default, usually for such optimizations it is that it is incomplete. However, there is no indication whether this a gt1 only issue either. Before considering reenabling this optimization, I would first suggest reproducing the corruption in piglit. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75623 Signed-off-by: Chris Wilson Cc: Chia-I Wu Cc: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d668866ba2eb6..76d0bbcbeab0a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4972,9 +4972,11 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) gen7_setup_fixed_func_scheduler(dev_priv); - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + I915_WRITE(CACHE_MODE_0_GEN7, + _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); + } /* WaDisable4x2SubspanOptimization:ivb */ I915_WRITE(CACHE_MODE_1, From 7d5e379989f46d80617709d4c45947136c50b2c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Mar 2014 13:15:08 +0000 Subject: [PATCH 029/107] drm/i915: Reject changes of fb base when we have a flip pending This should be impossible due to the wait for outstanding flips that the caller is meant to perform prior to updating the scanout base. Paranoia tells me to check anyway. References: https://bugs.freedesktop.org/show_bug.cgi?id=75502 Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 43 ++++++++++++++++------------ 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 924f3cee4badb..7f92b039191b1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2321,6 +2321,25 @@ intel_finish_fb(struct drm_framebuffer *old_fb) return ret; } +static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + unsigned long flags; + bool pending; + + if (i915_reset_in_progress(&dev_priv->gpu_error) || + intel_crtc->reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) + return false; + + spin_lock_irqsave(&dev->event_lock, flags); + pending = to_intel_crtc(crtc)->unpin_work != NULL; + spin_unlock_irqrestore(&dev->event_lock, flags); + + return pending; +} + static int intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *fb) @@ -2331,6 +2350,11 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb; int ret; + if (intel_crtc_has_pending_flip(crtc)) { + DRM_ERROR("pipe is still busy with an old pageflip\n"); + return -EBUSY; + } + /* no fb bound */ if (!fb) { DRM_ERROR("No FB bound\n"); @@ -2956,25 +2980,6 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc) udelay(100); } -static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - unsigned long flags; - bool pending; - - if (i915_reset_in_progress(&dev_priv->gpu_error) || - intel_crtc->reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) - return false; - - spin_lock_irqsave(&dev->event_lock, flags); - pending = to_intel_crtc(crtc)->unpin_work != NULL; - spin_unlock_irqrestore(&dev->event_lock, flags); - - return pending; -} - bool intel_has_pending_fb_unpin(struct drm_device *dev) { struct intel_crtc *crtc; From 7c2bb53110f2c9a9c1c6a1a2699d8611c9292745 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Mar 2014 21:08:41 +0100 Subject: [PATCH 030/107] drm/i915: s/any_enabled/!fallback/ in fbdev_initial_config It started as a simple check whether anything is lit up, but now is't used to driver the general fallback logic to the default output configuration selector in the helper library. So rename it for more clarity. Cc: Jesse Barnes Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_fbdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 4e4b461e0a70c..df00e6b01f0da 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -289,7 +289,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, struct drm_device *dev = fb_helper->dev; int i, j; bool *save_enabled; - bool any_enabled = false; + bool fallback = true; /* * If the user specified any force options, just bail here @@ -347,7 +347,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, */ for (j = 0; j < fb_helper->connector_count; j++) { if (crtcs[j] == new_crtc) { - any_enabled = false; + fallback = true; goto out; } } @@ -390,11 +390,11 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, encoder->crtc->base.id, modes[i]->name); - any_enabled = true; + fallback = false; } out: - if (!any_enabled) { + if (fallback) { memcpy(enabled, save_enabled, dev->mode_config.num_connector); kfree(save_enabled); return false; From 7e696e4cadcbeb5e7f794fe77f02a20e857feeb1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Mar 2014 21:08:42 +0100 Subject: [PATCH 031/107] drm/i915: ignore bios output config if not all outputs are on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both Ville and QA rather immediately complained that with the new initial_config logic from Jesse not all outputs get enabled. Since the fbdev emulation pretty much tries to always enable as many outputs as possible (it even has hotplug handling and all that) fall back if more outputs could have been enabled. v2: Fix up my confusion about what enabled means - it's passed from the fbdev helper, we need to check for a non-zero connector->encoder link. Spotted by Ville. v3: Add some debug output as requested by Jesse for debugging fallback issues. Cc: Jesse Barnes Cc: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75552 Tested-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_fbdev.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index df00e6b01f0da..6b5beed28d700 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -290,6 +290,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, int i, j; bool *save_enabled; bool fallback = true; + int num_connectors_enabled = 0; + int num_connectors_detected = 0; /* * If the user specified any force options, just bail here @@ -324,6 +326,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, fb_conn = fb_helper->connector_info[i]; connector = fb_conn->connector; + + if (connector->status == connector_status_connected) + num_connectors_detected++; + if (!enabled[i]) { DRM_DEBUG_KMS("connector %d not enabled, skipping\n", connector->base.id); @@ -338,6 +344,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, continue; } + num_connectors_enabled++; + new_crtc = intel_fb_helper_crtc(fb_helper, encoder->crtc); /* @@ -347,6 +355,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, */ for (j = 0; j < fb_helper->connector_count; j++) { if (crtcs[j] == new_crtc) { + DRM_DEBUG_KMS("fallback: cloned configuration\n"); fallback = true; goto out; } @@ -393,8 +402,22 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, fallback = false; } + /* + * If the BIOS didn't enable everything it could, fall back to have the + * same user experiencing of lighting up as much as possible like the + * fbdev helper library. + */ + if (num_connectors_enabled != num_connectors_detected && + num_connectors_enabled < INTEL_INFO(dev)->num_pipes) { + DRM_DEBUG_KMS("fallback: Not all outputs enabled\n"); + DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled, + num_connectors_detected); + fallback = true; + } + out: if (fallback) { + DRM_DEBUG_KMS("Not using firmware configuration\n"); memcpy(enabled, save_enabled, dev->mode_config.num_connector); kfree(save_enabled); return false; From da7e29bd5b6dfab9b64eeffa7816fdcf00048d14 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 18 Feb 2014 00:02:02 +0200 Subject: [PATCH 032/107] drm/i915: use drm_i915_private everywhere in the power domain api The power domains framework is internal to the i915 driver, so pass drm_i915_private instead of drm_device to its functions. Also remove a dangling intel_set_power_well() declaration. No functional change. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 14 +++--- drivers/gpu/drm/i915/i915_drv.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/intel_display.c | 27 ++++++------ drivers/gpu/drm/i915/intel_drv.h | 17 ++++---- drivers/gpu/drm/i915/intel_pm.c | 65 +++++++++++----------------- 6 files changed, 58 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 7688abc83fc09..8177c172a123c 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1325,7 +1325,7 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_gem_stolen; - intel_power_domains_init_hw(dev); + intel_power_domains_init_hw(dev_priv); /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ @@ -1343,7 +1343,7 @@ static int i915_load_modeset_init(struct drm_device *dev) /* FIXME: do pre/post-mode set stuff in core KMS code */ dev->vblank_disable_allowed = true; if (INTEL_INFO(dev)->num_pipes == 0) { - intel_display_power_put(dev, POWER_DOMAIN_VGA); + intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); return 0; } @@ -1381,7 +1381,7 @@ static int i915_load_modeset_init(struct drm_device *dev) WARN_ON(dev_priv->mm.aliasing_ppgtt); drm_mm_takedown(&dev_priv->gtt.base.mm); cleanup_power: - intel_display_power_put(dev, POWER_DOMAIN_VGA); + intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); drm_irq_uninstall(dev); cleanup_gem_stolen: i915_gem_cleanup_stolen(dev); @@ -1702,7 +1702,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_gem_unload; } - intel_power_domains_init(dev); + intel_power_domains_init(dev_priv); if (drm_core_check_feature(dev, DRIVER_MODESET)) { ret = i915_load_modeset_init(dev); @@ -1731,7 +1731,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return 0; out_power_well: - intel_power_domains_remove(dev); + intel_power_domains_remove(dev_priv); drm_vblank_cleanup(dev); out_gem_unload: if (dev_priv->mm.inactive_shrinker.scan_objects) @@ -1781,8 +1781,8 @@ int i915_driver_unload(struct drm_device *dev) /* The i915.ko module is still not prepared to be loaded when * the power well is not enabled, so just enable it in case * we're going to unload/reload. */ - intel_display_set_init_power(dev, true); - intel_power_domains_remove(dev); + intel_display_set_init_power(dev_priv, true); + intel_power_domains_remove(dev_priv); i915_teardown_sysfs(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2d05d7ce4c297..c4abe877fe335 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -434,7 +434,7 @@ static int i915_drm_freeze(struct drm_device *dev) /* We do a lot of poking in a lot of registers, make sure they work * properly. */ hsw_disable_package_c8(dev_priv); - intel_display_set_init_power(dev, true); + intel_display_set_init_power(dev_priv, true); drm_kms_helper_poll_disable(dev); @@ -556,7 +556,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) mutex_unlock(&dev->struct_mutex); } - intel_power_domains_init_hw(dev); + intel_power_domains_init_hw(dev_priv); i915_restore_state(dev); intel_opregion_setup(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c942dbfbf003c..68100415e7a3d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1024,9 +1024,9 @@ struct i915_power_well { int count; unsigned long domains; void *data; - void (*set)(struct drm_device *dev, struct i915_power_well *power_well, + void (*set)(struct drm_i915_private *dev_priv, struct i915_power_well *power_well, bool enable); - bool (*is_enabled)(struct drm_device *dev, + bool (*is_enabled)(struct drm_i915_private *dev_priv, struct i915_power_well *power_well); }; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7f92b039191b1..0a2a9868dbae0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1122,7 +1122,7 @@ void assert_pipe(struct drm_i915_private *dev_priv, if (pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) state = true; - if (!intel_display_power_enabled(dev_priv->dev, + if (!intel_display_power_enabled(dev_priv, POWER_DOMAIN_TRANSCODER(cpu_transcoder))) { cur_state = false; } else { @@ -6863,23 +6863,23 @@ static unsigned long get_pipe_power_domains(struct drm_device *dev, return mask; } -void intel_display_set_init_power(struct drm_device *dev, bool enable) +void intel_display_set_init_power(struct drm_i915_private *dev_priv, + bool enable) { - struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->power_domains.init_power_on == enable) return; if (enable) - intel_display_power_get(dev, POWER_DOMAIN_INIT); + intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); else - intel_display_power_put(dev, POWER_DOMAIN_INIT); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); dev_priv->power_domains.init_power_on = enable; } static void modeset_update_crtc_power_domains(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long pipe_domains[I915_MAX_PIPES] = { 0, }; struct intel_crtc *crtc; @@ -6898,19 +6898,19 @@ static void modeset_update_crtc_power_domains(struct drm_device *dev) crtc->config.pch_pfit.enabled); for_each_power_domain(domain, pipe_domains[crtc->pipe]) - intel_display_power_get(dev, domain); + intel_display_power_get(dev_priv, domain); } list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { enum intel_display_power_domain domain; for_each_power_domain(domain, crtc->enabled_power_domains) - intel_display_power_put(dev, domain); + intel_display_power_put(dev_priv, domain); crtc->enabled_power_domains = pipe_domains[crtc->pipe]; } - intel_display_set_init_power(dev, false); + intel_display_set_init_power(dev_priv, false); } static void haswell_modeset_global_resources(struct drm_device *dev) @@ -6991,7 +6991,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, pipe_config->cpu_transcoder = TRANSCODER_EDP; } - if (!intel_display_power_enabled(dev, + if (!intel_display_power_enabled(dev_priv, POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder))) return false; @@ -7019,7 +7019,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, intel_get_pipe_timings(crtc, pipe_config); pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); - if (intel_display_power_enabled(dev, pfit_domain)) + if (intel_display_power_enabled(dev_priv, pfit_domain)) ironlake_get_pfit_config(crtc, pipe_config); if (IS_HASWELL(dev)) @@ -11595,7 +11595,8 @@ intel_display_capture_error_state(struct drm_device *dev) for_each_pipe(i) { error->pipe[i].power_domain_on = - intel_display_power_enabled_sw(dev, POWER_DOMAIN_PIPE(i)); + intel_display_power_enabled_sw(dev_priv, + POWER_DOMAIN_PIPE(i)); if (!error->pipe[i].power_domain_on) continue; @@ -11633,7 +11634,7 @@ intel_display_capture_error_state(struct drm_device *dev) enum transcoder cpu_transcoder = transcoders[i]; error->transcoder[i].power_domain_on = - intel_display_power_enabled_sw(dev, + intel_display_power_enabled_sw(dev_priv, POWER_DOMAIN_TRANSCODER(cpu_transcoder)); if (!error->transcoder[i].power_domain_on) continue; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a4ffc021c317c..60427970caedb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -732,7 +732,7 @@ ironlake_check_encoder_dotclock(const struct intel_crtc_config *pipe_config, bool intel_crtc_active(struct drm_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); -void intel_display_set_init_power(struct drm_device *dev, bool enable); +void intel_display_set_init_power(struct drm_i915_private *dev, bool enable); int valleyview_get_vco(struct drm_i915_private *dev_priv); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_config *pipe_config); @@ -871,18 +871,17 @@ bool intel_fbc_enabled(struct drm_device *dev); void intel_update_fbc(struct drm_device *dev); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); -int intel_power_domains_init(struct drm_device *dev); -void intel_power_domains_remove(struct drm_device *dev); -bool intel_display_power_enabled(struct drm_device *dev, +int intel_power_domains_init(struct drm_i915_private *); +void intel_power_domains_remove(struct drm_i915_private *); +bool intel_display_power_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -bool intel_display_power_enabled_sw(struct drm_device *dev, +bool intel_display_power_enabled_sw(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -void intel_display_power_get(struct drm_device *dev, +void intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -void intel_display_power_put(struct drm_device *dev, +void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -void intel_power_domains_init_hw(struct drm_device *dev); -void intel_set_power_well(struct drm_device *dev, bool enable); +void intel_power_domains_init_hw(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_device *dev); void intel_disable_gt_powersave(struct drm_device *dev); void ironlake_teardown_rc6(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 76d0bbcbeab0a..87ab6544dc48f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5205,19 +5205,16 @@ void intel_suspend_hw(struct drm_device *dev) * enable it, so check if it's enabled and also check if we've requested it to * be enabled. */ -static bool hsw_power_well_enabled(struct drm_device *dev, +static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - struct drm_i915_private *dev_priv = dev->dev_private; - return I915_READ(HSW_PWR_WELL_DRIVER) == (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED); } -bool intel_display_power_enabled_sw(struct drm_device *dev, +bool intel_display_power_enabled_sw(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_power_domains *power_domains; power_domains = &dev_priv->power_domains; @@ -5225,10 +5222,9 @@ bool intel_display_power_enabled_sw(struct drm_device *dev, return power_domains->domain_use_count[domain]; } -bool intel_display_power_enabled(struct drm_device *dev, +bool intel_display_power_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_power_domains *power_domains; struct i915_power_well *power_well; bool is_enabled; @@ -5243,7 +5239,7 @@ bool intel_display_power_enabled(struct drm_device *dev, if (power_well->always_on) continue; - if (!power_well->is_enabled(dev, power_well)) { + if (!power_well->is_enabled(dev_priv, power_well)) { is_enabled = false; break; } @@ -5309,10 +5305,9 @@ static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv) spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } -static void hsw_set_power_well(struct drm_device *dev, +static void hsw_set_power_well(struct drm_i915_private *dev_priv, struct i915_power_well *power_well, bool enable) { - struct drm_i915_private *dev_priv = dev->dev_private; bool is_enabled, enable_requested; uint32_t tmp; @@ -5346,35 +5341,30 @@ static void hsw_set_power_well(struct drm_device *dev, } } -static void __intel_power_well_get(struct drm_device *dev, +static void __intel_power_well_get(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - struct drm_i915_private *dev_priv = dev->dev_private; - if (!power_well->count++ && power_well->set) { hsw_disable_package_c8(dev_priv); - power_well->set(dev, power_well, true); + power_well->set(dev_priv, power_well, true); } } -static void __intel_power_well_put(struct drm_device *dev, +static void __intel_power_well_put(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - struct drm_i915_private *dev_priv = dev->dev_private; - WARN_ON(!power_well->count); if (!--power_well->count && power_well->set && i915.disable_power_well) { - power_well->set(dev, power_well, false); + power_well->set(dev_priv, power_well, false); hsw_enable_package_c8(dev_priv); } } -void intel_display_power_get(struct drm_device *dev, +void intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_power_domains *power_domains; struct i915_power_well *power_well; int i; @@ -5384,17 +5374,16 @@ void intel_display_power_get(struct drm_device *dev, mutex_lock(&power_domains->lock); for_each_power_well(i, power_well, BIT(domain), power_domains) - __intel_power_well_get(dev, power_well); + __intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; mutex_unlock(&power_domains->lock); } -void intel_display_power_put(struct drm_device *dev, +void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_power_domains *power_domains; struct i915_power_well *power_well; int i; @@ -5407,7 +5396,7 @@ void intel_display_power_put(struct drm_device *dev, power_domains->domain_use_count[domain]--; for_each_power_well_rev(i, power_well, BIT(domain), power_domains) - __intel_power_well_put(dev, power_well); + __intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); } @@ -5424,7 +5413,7 @@ void i915_request_power_well(void) dev_priv = container_of(hsw_pwr, struct drm_i915_private, power_domains); - intel_display_power_get(dev_priv->dev, POWER_DOMAIN_AUDIO); + intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); } EXPORT_SYMBOL_GPL(i915_request_power_well); @@ -5438,7 +5427,7 @@ void i915_release_power_well(void) dev_priv = container_of(hsw_pwr, struct drm_i915_private, power_domains); - intel_display_power_put(dev_priv->dev, POWER_DOMAIN_AUDIO); + intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); } EXPORT_SYMBOL_GPL(i915_release_power_well); @@ -5483,9 +5472,8 @@ static struct i915_power_well bdw_power_wells[] = { (power_domains)->power_well_count = ARRAY_SIZE(__power_wells); \ }) -int intel_power_domains_init(struct drm_device *dev) +int intel_power_domains_init(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_power_domains *power_domains = &dev_priv->power_domains; mutex_init(&power_domains->lock); @@ -5494,10 +5482,10 @@ int intel_power_domains_init(struct drm_device *dev) * The enabling order will be from lower to higher indexed wells, * the disabling order is reversed. */ - if (IS_HASWELL(dev)) { + if (IS_HASWELL(dev_priv->dev)) { set_power_wells(power_domains, hsw_power_wells); hsw_pwr = power_domains; - } else if (IS_BROADWELL(dev)) { + } else if (IS_BROADWELL(dev_priv->dev)) { set_power_wells(power_domains, bdw_power_wells); hsw_pwr = power_domains; } else { @@ -5507,14 +5495,13 @@ int intel_power_domains_init(struct drm_device *dev) return 0; } -void intel_power_domains_remove(struct drm_device *dev) +void intel_power_domains_remove(struct drm_i915_private *dev_priv) { hsw_pwr = NULL; } -static void intel_power_domains_resume(struct drm_device *dev) +static void intel_power_domains_resume(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; int i; @@ -5522,7 +5509,7 @@ static void intel_power_domains_resume(struct drm_device *dev) mutex_lock(&power_domains->lock); for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { if (power_well->set) - power_well->set(dev, power_well, power_well->count > 0); + power_well->set(dev_priv, power_well, power_well->count > 0); } mutex_unlock(&power_domains->lock); } @@ -5533,15 +5520,13 @@ static void intel_power_domains_resume(struct drm_device *dev) * to be enabled, and it will only be disabled if none of the registers is * requesting it to be enabled. */ -void intel_power_domains_init_hw(struct drm_device *dev) +void intel_power_domains_init_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - /* For now, we need the power well to be always enabled. */ - intel_display_set_init_power(dev, true); - intel_power_domains_resume(dev); + intel_display_set_init_power(dev_priv, true); + intel_power_domains_resume(dev_priv); - if (!(IS_HASWELL(dev) || IS_BROADWELL(dev))) + if (!(IS_HASWELL(dev_priv->dev) || IS_BROADWELL(dev_priv->dev))) return; /* We're taking over the BIOS, so clear any requests made by it since From e13192f6c1e4aa064adab3865af321392a303c87 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 18 Feb 2014 00:02:15 +0200 Subject: [PATCH 033/107] drm/i915: switch order of power domain init wrt. irq install On VLV at least the display IRQ register access and functionality depends on its power well to be on, so move the power domain HW init before we install the IRQs. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8177c172a123c..f8f7a59ab0761 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1321,12 +1321,12 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_vga_switcheroo; + intel_power_domains_init_hw(dev_priv); + ret = drm_irq_install(dev); if (ret) goto cleanup_gem_stolen; - intel_power_domains_init_hw(dev_priv); - /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ intel_modeset_init(dev); From 040987539448708c36ece11c985ee9f3166bb420 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 18 Feb 2014 00:02:16 +0200 Subject: [PATCH 034/107] drm/i915: use power domain api to check vga power state This way we can reuse the check on other platforms too. Also factor out a version of the function that doesn't check if the power is on, we'll need to call this from within the power domain framework. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 68100415e7a3d..944c6174cd423 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2520,6 +2520,7 @@ extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state); extern void intel_modeset_setup_hw_state(struct drm_device *dev, bool force_restore); extern void i915_redisable_vga(struct drm_device *dev); +extern void i915_redisable_vga_power_on(struct drm_device *dev); extern bool intel_fbc_enabled(struct drm_device *dev); extern void intel_disable_fbc(struct drm_device *dev); extern bool ironlake_set_drps(struct drm_device *dev, u8 val); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0a2a9868dbae0..842b07ed742a7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11232,11 +11232,21 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) * the crtc fixup. */ } -void i915_redisable_vga(struct drm_device *dev) +void i915_redisable_vga_power_on(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 vga_reg = i915_vgacntrl_reg(dev); + if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { + DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); + i915_disable_vga(dev); + } +} + +void i915_redisable_vga(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + /* This function can be called both from intel_modeset_setup_hw_state or * at a very early point in our resume sequence, where the power well * structures are not yet restored. Since this function is at a very @@ -11244,14 +11254,10 @@ void i915_redisable_vga(struct drm_device *dev) * level, just check if the power well is enabled instead of trying to * follow the "don't touch the power well if we don't need it" policy * the rest of the driver uses. */ - if ((IS_HASWELL(dev) || IS_BROADWELL(dev)) && - (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE_ENABLED) == 0) + if (!intel_display_power_enabled(dev_priv, POWER_DOMAIN_VGA)) return; - if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { - DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); - i915_disable_vga(dev); - } + i915_redisable_vga_power_on(dev); } static void intel_modeset_readout_hw_state(struct drm_device *dev) From 93c73e8c6eae891f73e39fb4bad19f996d3a8c1f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 18 Feb 2014 00:02:19 +0200 Subject: [PATCH 035/107] drm/i915: move hsw power domain comment to its right place Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87ab6544dc48f..146cf83a93855 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5249,6 +5249,12 @@ bool intel_display_power_enabled(struct drm_i915_private *dev_priv, return is_enabled; } +/* + * Starting with Haswell, we have a "Power Down Well" that can be turned off + * when not needed anymore. We have 4 registers that can request the power well + * to be enabled, and it will only be disabled if none of the registers is + * requesting it to be enabled. + */ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -5514,12 +5520,6 @@ static void intel_power_domains_resume(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -/* - * Starting with Haswell, we have a "Power Down Well" that can be turned off - * when not needed anymore. We have 4 registers that can request the power well - * to be enabled, and it will only be disabled if none of the registers is - * requesting it to be enabled. - */ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv) { /* For now, we need the power well to be always enabled. */ From e9dbd2b20201b49b04476d2e5763faa822967913 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 18 Feb 2014 19:10:24 +0200 Subject: [PATCH 036/107] drm/i915: Fix forcewake counts for gen8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes generic driver code gets forcewake explicitly by gen6_gt_force_wake_get(), which check forcewake_count before accessing hardware. However the register access with gen8_write function access low level hw accessors directly, ignoring the forcewake_count. This leads to nested forcewake get from hardware, in ring init and possibly elsewhere, causing forcewake ack clear errors and/or hangs. Fix this by checking the forcewake count also in gen8_write v2: Read side doesn't care about shadowed registers, Remove __needs_put funkiness from gen8_write. (Ville) Improved commit message. References: https://bugs.freedesktop.org/show_bug.cgi?id=74007 Signed-off-by: Mika Kuoppala Cc: Ben Widawsky Cc: Ville Syrjälä Signed-off-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c62841404c826..d1e9d63953c5a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -634,16 +634,17 @@ static bool is_gen8_shadowed(struct drm_i915_private *dev_priv, u32 reg) #define __gen8_write(x) \ static void \ gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \ - bool __needs_put = reg < 0x40000 && !is_gen8_shadowed(dev_priv, reg); \ REG_WRITE_HEADER; \ - if (__needs_put) { \ - dev_priv->uncore.funcs.force_wake_get(dev_priv, \ - FORCEWAKE_ALL); \ - } \ - __raw_i915_write##x(dev_priv, reg, val); \ - if (__needs_put) { \ - dev_priv->uncore.funcs.force_wake_put(dev_priv, \ - FORCEWAKE_ALL); \ + if (reg < 0x40000 && !is_gen8_shadowed(dev_priv, reg)) { \ + if (dev_priv->uncore.forcewake_count == 0) \ + dev_priv->uncore.funcs.force_wake_get(dev_priv, \ + FORCEWAKE_ALL); \ + __raw_i915_write##x(dev_priv, reg, val); \ + if (dev_priv->uncore.forcewake_count == 0) \ + dev_priv->uncore.funcs.force_wake_put(dev_priv, \ + FORCEWAKE_ALL); \ + } else { \ + __raw_i915_write##x(dev_priv, reg, val); \ } \ REG_WRITE_FOOTER; \ } From f62a007603f86d36b920265bbf6ae0c698d882d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Feb 2014 17:55:39 +0000 Subject: [PATCH 037/107] drm/i915: Accurately track when we mark the hardware as idle/busy We currently call intel_mark_idle() too often, as we do so as a side-effect of processing the request queue. However, we the calls to intel_mark_idle() are expected to be paired with a call to intel_mark_busy() (or else we try to idle the hardware by accessing registers that are already disabled). Make the idle/busy tracking explicit to prevent the multiple calls. v2: We can drop some of the complexity in __i915_add_request() as queue_delayed_work() already behaves as we want (not requeuing the item if it is already in the queue) and mark_busy/mark_idle imply that the idle task is inactive. v3: We do still need to cancel the pending idle task so that it is sent again after the current busy load completes (not in the middle of it). Reported-by: Paulo Zanoni Signed-off-by: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Paulo Zanoni Tested-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++++++ drivers/gpu/drm/i915/i915_gem.c | 14 +++++--------- drivers/gpu/drm/i915/intel_display.c | 9 +++++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 944c6174cd423..0d1e8bebaecd6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1124,6 +1124,14 @@ struct i915_gem_mm { */ bool interruptible; + /** + * Is the GPU currently considered idle, or busy executing userspace + * requests? Whilst idle, we attempt to power down the hardware and + * display clocks. In order to reduce the effect on performance, there + * is a slight delay before we do so. + */ + bool busy; + /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; /** Bit 6 swizzling required for Y tiling */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3618bb0cda0a8..6978e692cd82d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2148,7 +2148,6 @@ int __i915_add_request(struct intel_ring_buffer *ring, drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; u32 request_ring_position, request_start; - int was_empty; int ret; request_start = intel_ring_get_tail(ring); @@ -2199,7 +2198,6 @@ int __i915_add_request(struct intel_ring_buffer *ring, i915_gem_context_reference(request->ctx); request->emitted_jiffies = jiffies; - was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); request->file_priv = NULL; @@ -2220,13 +2218,11 @@ int __i915_add_request(struct intel_ring_buffer *ring, if (!dev_priv->ums.mm_suspended) { i915_queue_hangcheck(ring->dev); - if (was_empty) { - cancel_delayed_work_sync(&dev_priv->mm.idle_work); - queue_delayed_work(dev_priv->wq, - &dev_priv->mm.retire_work, - round_jiffies_up_relative(HZ)); - intel_mark_busy(dev_priv->dev); - } + cancel_delayed_work_sync(&dev_priv->mm.idle_work); + queue_delayed_work(dev_priv->wq, + &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); + intel_mark_busy(dev_priv->dev); } if (out_seqno) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 842b07ed742a7..ef26312e98dd7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8197,8 +8197,12 @@ void intel_mark_busy(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + if (dev_priv->mm.busy) + return; + hsw_package_c8_gpu_busy(dev_priv); i915_update_gfx_val(dev_priv); + dev_priv->mm.busy = true; } void intel_mark_idle(struct drm_device *dev) @@ -8206,6 +8210,11 @@ void intel_mark_idle(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; + if (!dev_priv->mm.busy) + return; + + dev_priv->mm.busy = false; + hsw_package_c8_gpu_idle(dev_priv); if (!i915.powersave) From 8f94d24b7b3191fc8a6cf16c9d815410f5e09ae3 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 20 Feb 2014 16:01:20 -0800 Subject: [PATCH 038/107] drm/i915/bdw: Add FBC support This got lost when we shuffled around our internal branch and GEN7_FEATURES macro. There were no HW changes to support FBC, so we just need to set the flag. v2: Don't allow FBC for any pipe but A on platforms with DDI. (Paulo) Cc: Daisy Sun Signed-off-by: Ben Widawsky Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/intel_pm.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c4abe877fe335..17c4466087a48 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -265,6 +265,7 @@ static const struct intel_device_info intel_broadwell_d_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .has_llc = 1, .has_ddi = 1, + .has_fbc = 1, GEN_DEFAULT_PIPEOFFSETS, }; @@ -274,6 +275,7 @@ static const struct intel_device_info intel_broadwell_m_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .has_llc = 1, .has_ddi = 1, + .has_fbc = 1, GEN_DEFAULT_PIPEOFFSETS, }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 146cf83a93855..e8fc428485842 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -540,7 +540,7 @@ void intel_update_fbc(struct drm_device *dev) DRM_DEBUG_KMS("mode too large for compression, disabling\n"); goto out_disable; } - if ((INTEL_INFO(dev)->gen < 4 || IS_HASWELL(dev)) && + if ((INTEL_INFO(dev)->gen < 4 || HAS_DDI(dev)) && intel_crtc->plane != PLANE_A) { if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE)) DRM_DEBUG_KMS("plane not A, disabling compression\n"); From 8232644ccf099548710843e97360a3fcd6d28e04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 5 Mar 2014 12:00:39 +0000 Subject: [PATCH 039/107] drm/i915: Convert the forcewake worker into a timer func MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't want to suffer scheduling delay when turning off the GPU after waking it up to touch registers. Ideally, we only want to keep the GPU awake for the register access sequence, with a single forcewake dance on the first access and release immediately after the last. We set a timer on the first access so that we only dance once and on the next scheduler tick, we drop the forcewake again. This moves the cleanup routine from the common i915 workqueue to a timer func so that we don't anger powertop, and drop the forcewake again quicker. v2: Enable the deferred force_wake_put for regular register reads as well. v3: Beautification and make sure we disable forcewake when shutting down. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_uncore.c | 35 +++++++++++++---------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0d1e8bebaecd6..70a2d754f50c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -499,7 +499,7 @@ struct intel_uncore { unsigned fw_rendercount; unsigned fw_mediacount; - struct delayed_work force_wake_work; + struct timer_list force_wake_timer; }; #define DEV_INFO_FOR_EACH_FLAG(func, sep) \ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d1e9d63953c5a..e5b59ac5151a5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -289,10 +289,9 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void gen6_force_wake_work(struct work_struct *work) +static void gen6_force_wake_timer(unsigned long arg) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), uncore.force_wake_work.work); + struct drm_i915_private *dev_priv = (void *)arg; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -405,9 +404,8 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (--dev_priv->uncore.forcewake_count == 0) { dev_priv->uncore.forcewake_count++; - mod_delayed_work(dev_priv->wq, - &dev_priv->uncore.force_wake_work, - 1); + mod_timer_pinned(&dev_priv->uncore.force_wake_timer, + jiffies + 1); } spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); @@ -484,17 +482,15 @@ gen5_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ static u##x \ gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ REG_READ_HEADER(x); \ - if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ - if (dev_priv->uncore.forcewake_count == 0) \ - dev_priv->uncore.funcs.force_wake_get(dev_priv, \ - FORCEWAKE_ALL); \ - val = __raw_i915_read##x(dev_priv, reg); \ - if (dev_priv->uncore.forcewake_count == 0) \ - dev_priv->uncore.funcs.force_wake_put(dev_priv, \ - FORCEWAKE_ALL); \ - } else { \ - val = __raw_i915_read##x(dev_priv, reg); \ + if (dev_priv->uncore.forcewake_count == 0 && \ + NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ + dev_priv->uncore.funcs.force_wake_get(dev_priv, \ + FORCEWAKE_ALL); \ + dev_priv->uncore.forcewake_count++; \ + mod_timer_pinned(&dev_priv->uncore.force_wake_timer, \ + jiffies + 1); \ } \ + val = __raw_i915_read##x(dev_priv, reg); \ REG_READ_FOOTER; \ } @@ -682,8 +678,8 @@ void intel_uncore_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - INIT_DELAYED_WORK(&dev_priv->uncore.force_wake_work, - gen6_force_wake_work); + setup_timer(&dev_priv->uncore.force_wake_timer, + gen6_force_wake_timer, (unsigned long)dev_priv); if (IS_VALLEYVIEW(dev)) { dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get; @@ -795,10 +791,11 @@ void intel_uncore_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - flush_delayed_work(&dev_priv->uncore.force_wake_work); + del_timer_sync(&dev_priv->uncore.force_wake_timer); /* Paranoia: make sure we have disabled everything before we exit. */ intel_uncore_sanitize(dev); + intel_uncore_forcewake_reset(dev); } static const struct register_whitelist { From ea9a6bafcfdb4a7121d71dfb7090200d7260bca7 Mon Sep 17 00:00:00 2001 From: Shobhit Kumar Date: Fri, 28 Feb 2014 11:18:46 +0530 Subject: [PATCH 040/107] drm/i915: Update VBT data structures to have MIPI block enhancements MIPI Block #52 which provides configuration details for the MIPI panel including dphy settings as per panel and tcon specs Block #53 gives information on panel enable sequences v2: Address review comemnts from Jani - Move panel ids from intel_dsi.h to intel_bios.h - bdb_mipi_config structure improvements for cleaner code - Adding units for the pps delays, all in ms - change data structure to be more cleaner and simple v3: Corrected the unit for pps delays as 100us Signed-off-by: Shobhit Kumar Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_bios.c | 4 +- drivers/gpu/drm/i915/intel_bios.h | 174 +++++++++++++++++++++++++----- 2 files changed, 147 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 86b95ca413d16..4867f4cc0938c 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -599,14 +599,14 @@ parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb) { struct bdb_mipi *mipi; - mipi = find_section(bdb, BDB_MIPI); + mipi = find_section(bdb, BDB_MIPI_CONFIG); if (!mipi) { DRM_DEBUG_KMS("No MIPI BDB found"); return; } /* XXX: add more info */ - dev_priv->vbt.dsi.panel_id = mipi->panel_id; + dev_priv->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID; } static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index 282de5e9f39de..83b7629e4367b 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -104,7 +104,8 @@ struct vbios_data { #define BDB_LVDS_LFP_DATA 42 #define BDB_LVDS_BACKLIGHT 43 #define BDB_LVDS_POWER 44 -#define BDB_MIPI 50 +#define BDB_MIPI_CONFIG 52 +#define BDB_MIPI_SEQUENCE 53 #define BDB_SKIP 254 /* VBIOS private block, ignore */ struct bdb_general_features { @@ -711,44 +712,159 @@ int intel_parse_bios(struct drm_device *dev); #define DVO_PORT_DPD 9 #define DVO_PORT_DPA 10 -/* MIPI DSI panel info */ -struct bdb_mipi { - u16 panel_id; - u16 bridge_revision; - - /* General params */ - u32 dithering:1; - u32 bpp_pixel_format:1; - u32 rsvd1:1; - u32 dphy_valid:1; - u32 resvd2:28; +/* Block 52 contains MIPI Panel info + * 6 such enteries will there. Index into correct + * entery is based on the panel_index in #40 LFP + */ +#define MAX_MIPI_CONFIGURATIONS 6 - u16 port_info; - u16 rsvd3:2; - u16 num_lanes:2; - u16 rsvd4:12; +#define MIPI_DSI_UNDEFINED_PANEL_ID 0 +#define MIPI_DSI_GENERIC_PANEL_ID 1 - /* DSI config */ - u16 virt_ch_num:2; - u16 vtm:2; - u16 rsvd5:12; +struct mipi_config { + u16 panel_id; - u32 dsi_clock; + /* General Params */ + u32 enable_dithering:1; + u32 rsvd1:1; + u32 is_bridge:1; + + u32 panel_arch_type:2; + u32 is_cmd_mode:1; + +#define NON_BURST_SYNC_PULSE 0x1 +#define NON_BURST_SYNC_EVENTS 0x2 +#define BURST_MODE 0x3 + u32 video_transfer_mode:2; + + u32 cabc_supported:1; + u32 pwm_blc:1; + + /* Bit 13:10 */ +#define PIXEL_FORMAT_RGB565 0x1 +#define PIXEL_FORMAT_RGB666 0x2 +#define PIXEL_FORMAT_RGB666_LOOSELY_PACKED 0x3 +#define PIXEL_FORMAT_RGB888 0x4 + u32 videomode_color_format:4; + + /* Bit 15:14 */ +#define ENABLE_ROTATION_0 0x0 +#define ENABLE_ROTATION_90 0x1 +#define ENABLE_ROTATION_180 0x2 +#define ENABLE_ROTATION_270 0x3 + u32 rotation:2; + u32 bta_enabled:1; + u32 rsvd2:15; + + /* 2 byte Port Description */ +#define DUAL_LINK_NOT_SUPPORTED 0 +#define DUAL_LINK_FRONT_BACK 1 +#define DUAL_LINK_PIXEL_ALT 2 + u16 dual_link:2; + u16 lane_cnt:2; + u16 rsvd3:12; + + u16 rsvd4; + + u8 rsvd5[5]; + u32 dsi_ddr_clk; u32 bridge_ref_clk; - u16 rsvd_pwr; - /* Dphy Params */ - u32 prepare_cnt:5; - u32 rsvd6:3; +#define BYTE_CLK_SEL_20MHZ 0 +#define BYTE_CLK_SEL_10MHZ 1 +#define BYTE_CLK_SEL_5MHZ 2 + u8 byte_clk_sel:2; + + u8 rsvd6:6; + + /* DPHY Flags */ + u16 dphy_param_valid:1; + u16 eot_pkt_disabled:1; + u16 enable_clk_stop:1; + u16 rsvd7:13; + + u32 hs_tx_timeout; + u32 lp_rx_timeout; + u32 turn_around_timeout; + u32 device_reset_timer; + u32 master_init_timer; + u32 dbi_bw_timer; + u32 lp_byte_clk_val; + + /* 4 byte Dphy Params */ + u32 prepare_cnt:6; + u32 rsvd8:2; u32 clk_zero_cnt:8; u32 trail_cnt:5; - u32 rsvd7:3; + u32 rsvd9:3; u32 exit_zero_cnt:6; - u32 rsvd8:2; + u32 rsvd10:2; - u32 hl_switch_cnt; - u32 lp_byte_clk; u32 clk_lane_switch_cnt; + u32 hl_switch_cnt; + + u32 rsvd11[6]; + + /* timings based on dphy spec */ + u8 tclk_miss; + u8 tclk_post; + u8 rsvd12; + u8 tclk_pre; + u8 tclk_prepare; + u8 tclk_settle; + u8 tclk_term_enable; + u8 tclk_trail; + u16 tclk_prepare_clkzero; + u8 rsvd13; + u8 td_term_enable; + u8 teot; + u8 ths_exit; + u8 ths_prepare; + u16 ths_prepare_hszero; + u8 rsvd14; + u8 ths_settle; + u8 ths_skip; + u8 ths_trail; + u8 tinit; + u8 tlpx; + u8 rsvd15[3]; + + /* GPIOs */ + u8 panel_enable; + u8 bl_enable; + u8 pwm_enable; + u8 reset_r_n; + u8 pwr_down_r; + u8 stdby_r_n; + } __packed; +/* Block 52 contains MIPI configuration block + * 6 * bdb_mipi_config, followed by 6 pps data + * block below + * + * all delays has a unit of 100us + */ +struct mipi_pps_data { + u16 panel_on_delay; + u16 bl_enable_delay; + u16 bl_disable_delay; + u16 panel_off_delay; + u16 panel_power_cycle_delay; +}; + +struct bdb_mipi_config { + struct mipi_config config[MAX_MIPI_CONFIGURATIONS]; + struct mipi_pps_data pps[MAX_MIPI_CONFIGURATIONS]; +}; + +/* Block 53 contains MIPI sequences as needed by the panel + * for enabling it. This block can be variable in size and + * can be maximum of 6 blocks + */ +struct bdb_mipi_sequence { + u8 version; + u8 data[0]; +}; + #endif /* _I830_BIOS_H_ */ From f103fc7db7c05eee7a0efc3c1e0938a40367c45c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 20 Feb 2014 12:39:57 -0800 Subject: [PATCH 041/107] drm/i915: print connector mode list in display_info Useful for bug reports. Signed-off-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f3015034fd2d3..6e668fa13b982 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2182,6 +2182,7 @@ static void intel_connector_info(struct seq_file *m, { struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_encoder *intel_encoder = intel_connector->encoder; + struct drm_display_mode *mode; seq_printf(m, "connector %d: type %s, status: %s\n", connector->base.id, drm_get_connector_name(connector), @@ -2204,6 +2205,9 @@ static void intel_connector_info(struct seq_file *m, else if (intel_encoder->type == INTEL_OUTPUT_LVDS) intel_lvds_info(m, intel_connector); + seq_printf(m, "\tmodes:\n"); + list_for_each_entry(mode, &connector->modes, head) + intel_seq_print_mode(m, 2, mode); } static int i915_display_info(struct seq_file *m, void *unused) From ccc7bed05e27a654db1e9e248ce5fb291c12add1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Feb 2014 16:26:47 +0200 Subject: [PATCH 042/107] drm/i915: Don't ban default context when stop_rings!=0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we've explicitly stopped the rings for testing purposes, don't ban the default context. Fixes kms_flip hang tests. Signed-off-by: Ville Syrjälä Acked-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6978e692cd82d..0ec1080b19123 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2255,14 +2255,13 @@ static bool i915_context_is_banned(struct drm_i915_private *dev_priv, return true; if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { - if (dev_priv->gpu_error.stop_rings == 0 && - i915_gem_context_is_default(ctx)) { - DRM_ERROR("gpu hanging too fast, banning!\n"); - } else { + if (!i915_gem_context_is_default(ctx)) { DRM_DEBUG("context hanging too fast, banning!\n"); + return true; + } else if (dev_priv->gpu_error.stop_rings == 0) { + DRM_ERROR("gpu hanging too fast, banning!\n"); + return true; } - - return true; } return false; From bb4cdd5345bf344d978e18835583641f2c156ce5 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:19 -0300 Subject: [PATCH 043/107] drm/i915: put runtime PM only at the end of intel_mark_idle Because intel_mark_idle still touches some registers: it needs the machine to be awake. If you set both the autosuspend and PC8 delays to zero, you can get a "Device suspended" WARN when gen6_rps_idle touches registers. This is not easy to reproduce, but happens once in a while when running pm_pc8. Testcase: igt/pm_pc8 Signed-off-by: Paulo Zanoni Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ef26312e98dd7..82ef8bf6243c2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8215,10 +8215,8 @@ void intel_mark_idle(struct drm_device *dev) dev_priv->mm.busy = false; - hsw_package_c8_gpu_idle(dev_priv); - if (!i915.powersave) - return; + goto out; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (!crtc->fb) @@ -8229,6 +8227,9 @@ void intel_mark_idle(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 6) gen6_rps_idle(dev->dev_private); + +out: + hsw_package_c8_gpu_idle(dev_priv); } void intel_mark_fb_busy(struct drm_i915_gem_object *obj, From 6d88064edcfc5e5893371f7c06b9f3078dc1edf6 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 17:58:29 -0300 Subject: [PATCH 044/107] drm/i915: put runtime PM only when we actually release force_wake When we call gen6_gt_force_wake_put we don't actually put force_wake, we just schedule gen6_force_wake_work through mod_delayed_work, and that will eventually release force_wake. The problem is that we call intel_runtime_pm_put directly at gen6_gt_force_wake_put, so most of the times we put our runtime PM reference before the delayed work happens, so we may runtime suspend while force_wake is still supposed to be enabled if the graphics autosuspend_delay_ms is too small. Now the nice thing about the current code is that after it triggers the delayed work function it gets a refcount, and it only triggers the delayed work function if refcount is zero. This guarantees that when we schedule the funciton, it will run before we try to schedule it again, which simplifies the problem and allows for the current solution to work properly (hopefully!). v2: - Keep the VLV refcounts balanced (Jesse) Signed-off-by: Paulo Zanoni Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e5b59ac5151a5..4c39e241c98e5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -298,6 +298,8 @@ static void gen6_force_wake_timer(unsigned long arg) if (--dev_priv->uncore.forcewake_count == 0) dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + + intel_runtime_pm_put(dev_priv); } static void intel_uncore_forcewake_reset(struct drm_device *dev) @@ -392,24 +394,30 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) { unsigned long irqflags; + bool delayed = false; if (!dev_priv->uncore.funcs.force_wake_put) return; /* Redirect to VLV specific routine */ - if (IS_VALLEYVIEW(dev_priv->dev)) - return vlv_force_wake_put(dev_priv, fw_engine); + if (IS_VALLEYVIEW(dev_priv->dev)) { + vlv_force_wake_put(dev_priv, fw_engine); + goto out; + } spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (--dev_priv->uncore.forcewake_count == 0) { dev_priv->uncore.forcewake_count++; + delayed = true; mod_timer_pinned(&dev_priv->uncore.force_wake_timer, jiffies + 1); } spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); - intel_runtime_pm_put(dev_priv); +out: + if (!delayed) + intel_runtime_pm_put(dev_priv); } /* We give fast paths for the really cool registers */ From c19a0df2ac0c613614417bba7ffec8daa248bb82 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:22 -0300 Subject: [PATCH 045/107] drm/i915: get runtime PM while trying to detect CRT Otherwise we'll read registers that return 0xffffffff, trigger some WARNs, think CRT is actually connected (because certain bits are 1), and fail the drm-resources-equal testcase! Tested on a SNB machine with runtime PM support (which is not upstream yet, but is already on my public tree at freedesktop.org, and will hopefully eventually become upstream). Testcase: igt/pm_pc8/drm-resources-equal Signed-off-by: Paulo Zanoni Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 9864aa1ccbe82..4c1230c737d5d 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -630,10 +630,13 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crt *crt = intel_attached_crt(connector); enum drm_connector_status status; struct intel_load_detect_pipe tmp; + intel_runtime_pm_get(dev_priv); + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n", connector->base.id, drm_get_connector_name(connector), force); @@ -645,23 +648,30 @@ intel_crt_detect(struct drm_connector *connector, bool force) */ if (intel_crt_detect_hotplug(connector)) { DRM_DEBUG_KMS("CRT detected via hotplug\n"); - return connector_status_connected; + status = connector_status_connected; + goto out; } else DRM_DEBUG_KMS("CRT not detected via hotplug\n"); } - if (intel_crt_detect_ddc(connector)) - return connector_status_connected; + if (intel_crt_detect_ddc(connector)) { + status = connector_status_connected; + goto out; + } /* Load detection is broken on HPD capable machines. Whoever wants a * broken monitor (without edid) to work behind a broken kvm (that fails * to have the right resistors for HP detection) needs to fix this up. * For now just bail out. */ - if (I915_HAS_HOTPLUG(dev)) - return connector_status_disconnected; + if (I915_HAS_HOTPLUG(dev)) { + status = connector_status_disconnected; + goto out; + } - if (!force) - return connector->status; + if (!force) { + status = connector->status; + goto out; + } /* for pre-945g platforms use load detect */ if (intel_get_load_detect_pipe(connector, NULL, &tmp)) { @@ -673,6 +683,8 @@ intel_crt_detect(struct drm_connector *connector, bool force) } else status = connector_status_unknown; +out: + intel_runtime_pm_put(dev_priv); return status; } From 36623ef8375e1040ed72f36c1e62371f1c14ceb4 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:23 -0300 Subject: [PATCH 046/107] drm/i915: get/put runtime PM in more places at i915_debugfs.c These are places where we read (not write) registers while we're runtime suspended. Signed-off-by: Paulo Zanoni Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6e668fa13b982..8c5e3d0d472f8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1348,6 +1348,8 @@ static int i915_fbc_status(struct seq_file *m, void *unused) return 0; } + intel_runtime_pm_get(dev_priv); + if (intel_fbc_enabled(dev)) { seq_puts(m, "FBC enabled\n"); } else { @@ -1391,6 +1393,9 @@ static int i915_fbc_status(struct seq_file *m, void *unused) } seq_putc(m, '\n'); } + + intel_runtime_pm_put(dev_priv); + return 0; } @@ -1405,11 +1410,15 @@ static int i915_ips_status(struct seq_file *m, void *unused) return 0; } + intel_runtime_pm_get(dev_priv); + if (IS_BROADWELL(dev) || I915_READ(IPS_CTL) & IPS_ENABLE) seq_puts(m, "enabled\n"); else seq_puts(m, "disabled\n"); + intel_runtime_pm_put(dev_priv); + return 0; } @@ -1420,6 +1429,8 @@ static int i915_sr_status(struct seq_file *m, void *unused) drm_i915_private_t *dev_priv = dev->dev_private; bool sr_enabled = false; + intel_runtime_pm_get(dev_priv); + if (HAS_PCH_SPLIT(dev)) sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN; else if (IS_CRESTLINE(dev) || IS_I945G(dev) || IS_I945GM(dev)) @@ -1429,6 +1440,8 @@ static int i915_sr_status(struct seq_file *m, void *unused) else if (IS_PINEVIEW(dev)) sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN; + intel_runtime_pm_put(dev_priv); + seq_printf(m, "self-refresh: %s\n", sr_enabled ? "enabled" : "disabled"); @@ -1974,12 +1987,16 @@ static int i915_energy_uJ(struct seq_file *m, void *data) if (INTEL_INFO(dev)->gen < 6) return -ENODEV; + intel_runtime_pm_get(dev_priv); + rdmsrl(MSR_RAPL_POWER_UNIT, power); power = (power & 0x1f00) >> 8; units = 1000000 / (1 << power); /* convert to uJ */ power = I915_READ(MCH_SECP_NRG_STTS); power *= units; + intel_runtime_pm_put(dev_priv); + seq_printf(m, "%llu", (long long unsigned)power); return 0; From 86c4ec0d32762cff8c49daf10fd83bceb4fbec0e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:24 -0300 Subject: [PATCH 047/107] drm/i915: kill dev_priv->pc8.gpu_idle Since the addition of dev_priv->mm.busy, there's no more need for dev_priv->pc8.gpu_idle, so kill it. Notice that when you remove gpu_idle, hsw_package_c8_gpu_idle and hsw_package_c8_gpu_busy become identical to hsw_enable_package_c8 and hsw_disable_package_c8, so just use them. Also, when we boot the machine, dev_priv->mm.busy initially considers the machine as idle. This is opposed to dev_priv->pc8.gpu_idle, which considered it busy. So dev_priv->pc8.disable_count has to be initalized to 1 now. Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 10 ++++------ drivers/gpu/drm/i915/intel_display.c | 30 ++-------------------------- drivers/gpu/drm/i915/intel_pm.c | 3 +-- 4 files changed, 8 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8c5e3d0d472f8..aef779b4193d0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2016,7 +2016,7 @@ static int i915_pc8_status(struct seq_file *m, void *unused) mutex_lock(&dev_priv->pc8.lock); seq_printf(m, "Requirements met: %s\n", yesno(dev_priv->pc8.requirements_met)); - seq_printf(m, "GPU idle: %s\n", yesno(dev_priv->pc8.gpu_idle)); + seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->mm.busy)); seq_printf(m, "Disable count: %d\n", dev_priv->pc8.disable_count); seq_printf(m, "IRQs disabled: %s\n", yesno(dev_priv->pc8.irqs_disabled)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 70a2d754f50c5..6503a5c513855 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1321,11 +1321,10 @@ struct ilk_wm_values { * Ideally every piece of our code that needs PC8+ disabled would call * hsw_disable_package_c8, which would increment disable_count and prevent the * system from reaching PC8+. But we don't have a symmetric way to do this for - * everything, so we have the requirements_met and gpu_idle variables. When we - * switch requirements_met or gpu_idle to true we decrease disable_count, and - * increase it in the opposite case. The requirements_met variable is true when - * all the CRTCs, encoders and the power well are disabled. The gpu_idle - * variable is true when the GPU is idle. + * everything, so we have the requirements_met variable. When we switch + * requirements_met to true we decrease disable_count, and increase it in the + * opposite case. The requirements_met variable is true when all the CRTCs, + * encoders and the power well are disabled. * * In addition to everything, we only actually enable PC8+ if disable_count * stays at zero for at least some seconds. This is implemented with the @@ -1348,7 +1347,6 @@ struct ilk_wm_values { */ struct i915_package_c8 { bool requirements_met; - bool gpu_idle; bool irqs_disabled; /* Only true after the delayed work task actually enables it. */ bool enabled; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 82ef8bf6243c2..680f4c7bd62df 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6817,32 +6817,6 @@ static void hsw_update_package_c8(struct drm_device *dev) mutex_unlock(&dev_priv->pc8.lock); } -static void hsw_package_c8_gpu_idle(struct drm_i915_private *dev_priv) -{ - if (!HAS_PC8(dev_priv->dev)) - return; - - mutex_lock(&dev_priv->pc8.lock); - if (!dev_priv->pc8.gpu_idle) { - dev_priv->pc8.gpu_idle = true; - __hsw_enable_package_c8(dev_priv); - } - mutex_unlock(&dev_priv->pc8.lock); -} - -static void hsw_package_c8_gpu_busy(struct drm_i915_private *dev_priv) -{ - if (!HAS_PC8(dev_priv->dev)) - return; - - mutex_lock(&dev_priv->pc8.lock); - if (dev_priv->pc8.gpu_idle) { - dev_priv->pc8.gpu_idle = false; - __hsw_disable_package_c8(dev_priv); - } - mutex_unlock(&dev_priv->pc8.lock); -} - #define for_each_power_domain(domain, mask) \ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ if ((1 << (domain)) & (mask)) @@ -8200,7 +8174,7 @@ void intel_mark_busy(struct drm_device *dev) if (dev_priv->mm.busy) return; - hsw_package_c8_gpu_busy(dev_priv); + hsw_disable_package_c8(dev_priv); i915_update_gfx_val(dev_priv); dev_priv->mm.busy = true; } @@ -8229,7 +8203,7 @@ void intel_mark_idle(struct drm_device *dev) gen6_rps_idle(dev->dev_private); out: - hsw_package_c8_gpu_idle(dev_priv); + hsw_enable_package_c8(dev_priv); } void intel_mark_fb_busy(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e8fc428485842..2ded0f6d543be 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5809,10 +5809,9 @@ void intel_pm_setup(struct drm_device *dev) mutex_init(&dev_priv->pc8.lock); dev_priv->pc8.requirements_met = false; - dev_priv->pc8.gpu_idle = false; dev_priv->pc8.irqs_disabled = false; dev_priv->pc8.enabled = false; - dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */ + dev_priv->pc8.disable_count = 1; /* requirements_met */ INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work); INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); From b2ec142cb0101f298f8e091c7d75b1ec5b809b65 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:25 -0300 Subject: [PATCH 048/107] drm/i915: call assert_device_not_suspended at gen6_force_wake_work Because we shouldn't be runtime suspended when forcewake is supposed to be enabled. Signed-off-by: Paulo Zanoni Reviewed-by: Imre Deak [danvet: Update commit message - no WARN expected since the bugfix for issues hit with this assert is already in. And resolve conflicts with the change from worker to timer for the delayed fw release.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4c39e241c98e5..467d5687e129f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -40,6 +40,12 @@ #define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__) +static void +assert_device_not_suspended(struct drm_i915_private *dev_priv) +{ + WARN(HAS_RUNTIME_PM(dev_priv->dev) && dev_priv->pm.suspended, + "Device suspended\n"); +} static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv) { @@ -294,6 +300,8 @@ static void gen6_force_wake_timer(unsigned long arg) struct drm_i915_private *dev_priv = (void *)arg; unsigned long irqflags; + assert_device_not_suspended(dev_priv); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (--dev_priv->uncore.forcewake_count == 0) dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); @@ -452,13 +460,6 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) } } -static void -assert_device_not_suspended(struct drm_i915_private *dev_priv) -{ - WARN(HAS_RUNTIME_PM(dev_priv->dev) && dev_priv->pm.suspended, - "Device suspended\n"); -} - #define REG_READ_HEADER(x) \ unsigned long irqflags; \ u##x val = 0; \ From e998c40fed02c258404d11dcf66409c390e3ef9a Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:26 -0300 Subject: [PATCH 049/107] drm/i915: assert force wake is disabled when we runtime suspend Just to be sure... Signed-off-by: Paulo Zanoni Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_uncore.c | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 17c4466087a48..70a4c9bb7b809 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -849,6 +849,7 @@ static int i915_runtime_suspend(struct device *device) struct drm_i915_private *dev_priv = dev->dev_private; WARN_ON(!HAS_RUNTIME_PM(dev)); + assert_force_wake_inactive(dev_priv); DRM_DEBUG_KMS("Suspending device\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6503a5c513855..3fe5a35f98a76 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2561,6 +2561,7 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, */ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine); void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine); +void assert_force_wake_inactive(struct drm_i915_private *dev_priv); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val); int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 467d5687e129f..8cb0a33054a66 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -428,6 +428,14 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) intel_runtime_pm_put(dev_priv); } +void assert_force_wake_inactive(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->uncore.funcs.force_wake_get) + return; + + WARN_ON(dev_priv->uncore.forcewake_count > 0); +} + /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(dev_priv, reg) \ ((reg) < 0x40000 && (reg) != FORCEWAKE) From 6f0ea9e212b36fe831f104ab2ac7582b9741600a Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Feb 2014 13:52:28 -0300 Subject: [PATCH 050/107] drm/i915: assert we're not runtime suspended when accessing registers I could swear this was already happening in the current code... Also, put the reads and writes in a generic place, so we don't forget it again when we add runtime PM support to new platforms. Signed-off-by: Paulo Zanoni Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8cb0a33054a66..e5ee65655e8c9 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -471,6 +471,7 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) #define REG_READ_HEADER(x) \ unsigned long irqflags; \ u##x val = 0; \ + assert_device_not_suspended(dev_priv); \ spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) #define REG_READ_FOOTER \ @@ -567,6 +568,7 @@ __gen4_read(64) #define REG_WRITE_HEADER \ unsigned long irqflags; \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ + assert_device_not_suspended(dev_priv); \ spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) #define REG_WRITE_FOOTER \ @@ -597,7 +599,6 @@ gen6_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ } \ - assert_device_not_suspended(dev_priv); \ __raw_i915_write##x(dev_priv, reg, val); \ if (unlikely(__fifo_ret)) { \ gen6_gt_check_fifodbg(dev_priv); \ @@ -613,7 +614,6 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ } \ - assert_device_not_suspended(dev_priv); \ hsw_unclaimed_reg_clear(dev_priv, reg); \ __raw_i915_write##x(dev_priv, reg, val); \ if (unlikely(__fifo_ret)) { \ From f900db4758ac251e9a9d31d32d48551cab071479 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 20 Feb 2014 09:26:13 +0000 Subject: [PATCH 051/107] drm/i915: Perform pageflip using mmio if the GPU is terminally wedged MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a hang and failed reset, we cannot use the GPU to execute the page flip instructions. Instead we can force a synchronous mmio flip. (Later, we can reduce the synchronicity of the mmio flip by moving some of the delays off to a worker, like the current page flip code; see vblank tasks.) References: https://bugs.freedesktop.org/show_bug.cgi?id=72631 Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 680f4c7bd62df..45bd176b68b58 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8653,6 +8653,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, fb->pitches[0] != crtc->fb->pitches[0])) return -EINVAL; + if (i915_terminally_wedged(&dev_priv->gpu_error)) + goto out_hang; + work = kzalloc(sizeof(*work), GFP_KERNEL); if (work == NULL) return -ENOMEM; @@ -8727,6 +8730,13 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, free_work: kfree(work); + if (ret == -EIO) { +out_hang: + intel_crtc_wait_for_pending_flips(crtc); + ret = intel_pipe_set_base(crtc, crtc->x, crtc->y, fb); + if (ret == 0 && event) + drm_send_vblank_event(dev, intel_crtc->pipe, event); + } return ret; } From ee7fa12ce4683e92e3ab0c43c36af5fb5f6b1054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Feb 2014 17:02:08 +0200 Subject: [PATCH 052/107] drm/i915: Fix VLV forcewake after reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the render/media specific forcewake counts to properly restore the forcewake status after a GPU reset on VLV. Signed-off-by: Ville Syrjälä Reviewed-by: Deepak S Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e5ee65655e8c9..ba465f9cc2f2a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -982,10 +982,22 @@ static int gen6_do_reset(struct drm_device *dev) intel_uncore_forcewake_reset(dev); /* If reset with a user forcewake, try to restore, otherwise turn it off */ - if (dev_priv->uncore.forcewake_count) - dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); - else - dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + if (IS_VALLEYVIEW(dev)) { + if (dev_priv->uncore.fw_rendercount) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_RENDER); + else + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_RENDER); + + if (dev_priv->uncore.fw_mediacount) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_MEDIA); + else + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_MEDIA); + } else { + if (dev_priv->uncore.forcewake_count) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); + else + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + } /* Restore fifo count */ dev_priv->uncore.fifo_count = __raw_i915_read32(dev_priv, GTFIFOCTL) & GT_FIFO_FREE_ENTRIES_MASK; From fc9d83f7475ec3164ee80582555f4fe8a7847319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Feb 2014 17:02:09 +0200 Subject: [PATCH 053/107] drm/i915: Drop the forcewake count inc/dec around register read on VLV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VLV is the only platform where we increment/decrement the forcewake count around register access. Drop the inc/dec on VLV to make the forcewake code a bit more unified. The inc/dec are not necessary since we hold the uncore lock around the whole operation. Signed-off-by: Ville Syrjälä Reviewed-by: Deepak S Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ba465f9cc2f2a..9d25edfb8a92f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -516,22 +516,22 @@ gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ static u##x \ vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ unsigned fwengine = 0; \ - unsigned *fwcount; \ + unsigned fwcount; \ REG_READ_HEADER(x); \ if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) { \ fwengine = FORCEWAKE_RENDER; \ - fwcount = &dev_priv->uncore.fw_rendercount; \ + fwcount = dev_priv->uncore.fw_rendercount; \ } \ else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) { \ fwengine = FORCEWAKE_MEDIA; \ - fwcount = &dev_priv->uncore.fw_mediacount; \ + fwcount = dev_priv->uncore.fw_mediacount; \ } \ if (fwengine != 0) { \ - if ((*fwcount)++ == 0) \ + if (fwcount == 0) \ (dev_priv)->uncore.funcs.force_wake_get(dev_priv, \ fwengine); \ val = __raw_i915_read##x(dev_priv, reg); \ - if (--(*fwcount) == 0) \ + if (fwcount == 0) \ (dev_priv)->uncore.funcs.force_wake_put(dev_priv, \ fwengine); \ } else { \ From 6fe7286530d9b9ce13421e3628bf564e896662a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Feb 2014 22:07:21 +0200 Subject: [PATCH 054/107] drm/i915: Streamline VLV forcewake handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It occured to me that when we're trying to wake up both render and media wells on VLV, we might end up calling the low level force_wake_get/put two times even though one call would be enough. Make that happen by figuring out which wells really need to be woken up based on the forcewake counts. Signed-off-by: Ville Syrjälä Reviewed-by:Deepak S Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 70 ++++++++++++----------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9d25edfb8a92f..04bd9717b021f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -257,16 +257,16 @@ void vlv_force_wake_get(struct drm_i915_private *dev_priv, unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (FORCEWAKE_RENDER & fw_engine) { - if (dev_priv->uncore.fw_rendercount++ == 0) - dev_priv->uncore.funcs.force_wake_get(dev_priv, - FORCEWAKE_RENDER); - } - if (FORCEWAKE_MEDIA & fw_engine) { - if (dev_priv->uncore.fw_mediacount++ == 0) - dev_priv->uncore.funcs.force_wake_get(dev_priv, - FORCEWAKE_MEDIA); - } + + if (fw_engine & FORCEWAKE_RENDER && + dev_priv->uncore.fw_rendercount++ != 0) + fw_engine &= ~FORCEWAKE_RENDER; + if (fw_engine & FORCEWAKE_MEDIA && + dev_priv->uncore.fw_mediacount++ != 0) + fw_engine &= ~FORCEWAKE_MEDIA; + + if (fw_engine) + dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_engine); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -278,19 +278,15 @@ void vlv_force_wake_put(struct drm_i915_private *dev_priv, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (FORCEWAKE_RENDER & fw_engine) { - WARN_ON(dev_priv->uncore.fw_rendercount == 0); - if (--dev_priv->uncore.fw_rendercount == 0) - dev_priv->uncore.funcs.force_wake_put(dev_priv, - FORCEWAKE_RENDER); - } + if (fw_engine & FORCEWAKE_RENDER && + --dev_priv->uncore.fw_rendercount != 0) + fw_engine &= ~FORCEWAKE_RENDER; + if (fw_engine & FORCEWAKE_MEDIA && + --dev_priv->uncore.fw_mediacount != 0) + fw_engine &= ~FORCEWAKE_MEDIA; - if (FORCEWAKE_MEDIA & fw_engine) { - WARN_ON(dev_priv->uncore.fw_mediacount == 0); - if (--dev_priv->uncore.fw_mediacount == 0) - dev_priv->uncore.funcs.force_wake_put(dev_priv, - FORCEWAKE_MEDIA); - } + if (fw_engine) + dev_priv->uncore.funcs.force_wake_put(dev_priv, fw_engine); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -516,27 +512,19 @@ gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ static u##x \ vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ unsigned fwengine = 0; \ - unsigned fwcount; \ REG_READ_HEADER(x); \ - if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) { \ - fwengine = FORCEWAKE_RENDER; \ - fwcount = dev_priv->uncore.fw_rendercount; \ - } \ - else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) { \ - fwengine = FORCEWAKE_MEDIA; \ - fwcount = dev_priv->uncore.fw_mediacount; \ + if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) { \ + if (dev_priv->uncore.fw_rendercount == 0) \ + fwengine = FORCEWAKE_RENDER; \ + } else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) { \ + if (dev_priv->uncore.fw_mediacount == 0) \ + fwengine = FORCEWAKE_MEDIA; \ } \ - if (fwengine != 0) { \ - if (fwcount == 0) \ - (dev_priv)->uncore.funcs.force_wake_get(dev_priv, \ - fwengine); \ - val = __raw_i915_read##x(dev_priv, reg); \ - if (fwcount == 0) \ - (dev_priv)->uncore.funcs.force_wake_put(dev_priv, \ - fwengine); \ - } else { \ - val = __raw_i915_read##x(dev_priv, reg); \ - } \ + if (fwengine) \ + dev_priv->uncore.funcs.force_wake_get(dev_priv, fwengine); \ + val = __raw_i915_read##x(dev_priv, reg); \ + if (fwengine) \ + dev_priv->uncore.funcs.force_wake_put(dev_priv, fwengine); \ REG_READ_FOOTER; \ } From 64bf930379ac8097705db7d40602c2aa9ec0d2f4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Feb 2014 14:23:28 +0000 Subject: [PATCH 055/107] drm/i915: Reset vma->mm_list after unbinding In place of true activity counting, we walk the list of vma associated with an object managing each on the vm's active/inactive list everytime we call move-to-inactive. This depends upon the vma->mm_list being cleared after unbinding, or else we run into difficulty when tracking the object in multiple vm's - we see a use-after free and corruption of the mm_list. Signed-off-by: Chris Wilson Cc: Ben Widawsky Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0ec1080b19123..b41ead6339637 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2739,7 +2739,7 @@ int i915_vma_unbind(struct i915_vma *vma) i915_gem_gtt_finish_object(obj); - list_del(&vma->mm_list); + list_del_init(&vma->mm_list); /* Avoid an unnecessary call to unbind on rebind. */ if (i915_is_ggtt(vma->vm)) obj->map_and_fenceable = true; From 8d9fc7fd2de6edc3b9c3f828a701bfa6891987e7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Feb 2014 17:11:23 +0200 Subject: [PATCH 056/107] drm/i915: Rely on accurate request tracking for finding hung batches In the past, it was possible to have multiple batches per request due to a stray signal or ENOMEM. As a result we had to scan each active object (filtered by those having the COMMAND domain) for the one that contained the ACTHD pointer. This was then made more complicated by the introduction of ppgtt, whereby ACTHD then pointed into the address space of the context and so also needed to be taken into account. This is a fairly robust approach (though the implementation is a little fragile and depends upon the per-generation setup, registers and parameters). However, due to the requirements for hangstats, we needed a robust method for associating batches with a particular request and having that we can rely upon it for finding the associated batch object for error capture. If the batch buffer tracking is not robust enough, that should become apparent quite quickly through an erroneous error capture. That should also help to make sure that the runtime reporting to userspace is robust. It also means that we then report the oldest incomplete batch on each ring, which can be useful for determining the state of userspace at the time of a hang. v2: Use i915_gem_find_active_request (Mika) v3: remove check for ring->get_seqno, split long lines (Ben) v4: check that context is available (Chris) checkpatch warnings fixed Signed-off-by: Chris Wilson (v1) Signed-off-by: Mika Kuoppala (v3) Cc: Ben Widawsky Reviewed-by: Ben Widawsky (v3) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_gem.c | 10 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 72 +++++---------------------- 3 files changed, 21 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3fe5a35f98a76..fe4427be2e03e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2161,6 +2161,9 @@ i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) } } +struct drm_i915_gem_request * +i915_gem_find_active_request(struct intel_ring_buffer *ring); + bool i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b41ead6339637..c5a182be2eb05 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2298,11 +2298,13 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request) kfree(request); } -static struct drm_i915_gem_request * -i915_gem_find_first_non_complete(struct intel_ring_buffer *ring) +struct drm_i915_gem_request * +i915_gem_find_active_request(struct intel_ring_buffer *ring) { struct drm_i915_gem_request *request; - const u32 completed_seqno = ring->get_seqno(ring, false); + u32 completed_seqno; + + completed_seqno = ring->get_seqno(ring, false); list_for_each_entry(request, &ring->request_list, list) { if (i915_seqno_passed(completed_seqno, request->seqno)) @@ -2320,7 +2322,7 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, struct drm_i915_gem_request *request; bool ring_hung; - request = i915_gem_find_first_non_complete(ring); + request = i915_gem_find_active_request(ring); if (request == NULL) return; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index dc47bb9742d2c..eed1b34eaf477 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -713,46 +713,14 @@ static void i915_gem_record_fences(struct drm_device *dev, } } -/* This assumes all batchbuffers are executed from the PPGTT. It might have to - * change in the future. */ -static bool is_active_vm(struct i915_address_space *vm, - struct intel_ring_buffer *ring) -{ - struct drm_device *dev = vm->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_hw_ppgtt *ppgtt; - - if (INTEL_INFO(dev)->gen < 7) - return i915_is_ggtt(vm); - - /* FIXME: This ignores that the global gtt vm is also on this list. */ - ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - - if (INTEL_INFO(dev)->gen >= 8) { - u64 pdp0 = (u64)I915_READ(GEN8_RING_PDP_UDW(ring, 0)) << 32; - pdp0 |= I915_READ(GEN8_RING_PDP_LDW(ring, 0)); - return pdp0 == ppgtt->pd_dma_addr[0]; - } else { - u32 pp_db; - pp_db = I915_READ(RING_PP_DIR_BASE(ring)); - return (pp_db >> 10) == ppgtt->pd_offset; - } -} - static struct drm_i915_error_object * i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, struct intel_ring_buffer *ring) { - struct i915_address_space *vm; - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - bool found_active = false; - u32 seqno; - - if (!ring->get_seqno) - return NULL; + struct drm_i915_gem_request *request; if (HAS_BROKEN_CS_TLB(dev_priv->dev)) { + struct drm_i915_gem_object *obj; u32 acthd = I915_READ(ACTHD); if (WARN_ON(ring->id != RCS)) @@ -765,33 +733,17 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, return i915_error_ggtt_object_create(dev_priv, obj); } - seqno = ring->get_seqno(ring, false); - list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - if (!is_active_vm(vm, ring)) - continue; - - found_active = true; - - list_for_each_entry(vma, &vm->active_list, mm_list) { - obj = vma->obj; - if (obj->ring != ring) - continue; - - if (i915_seqno_passed(seqno, obj->last_read_seqno)) - continue; - - if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) - continue; - - /* We need to copy these to an anonymous buffer as the simplest - * method to avoid being overwritten by userspace. - */ - return i915_error_object_create(dev_priv, obj, vm); - } - } + request = i915_gem_find_active_request(ring); + if (request == NULL) + return NULL; - WARN_ON(!found_active); - return NULL; + /* We need to copy these to an anonymous buffer as the simplest + * method to avoid being overwritten by userspace. + */ + return i915_error_object_create(dev_priv, request->batch_obj, + request->ctx ? + request->ctx->vm : + &dev_priv->gtt.base); } static void i915_record_ring_state(struct drm_device *dev, From ab0e7ff9f2d0bfe139a2ed5bb6a36f8cbd4e0886 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Feb 2014 17:11:24 +0200 Subject: [PATCH 057/107] drm/i915: Record pid/comm of hanging task After finding the guilty batch and request, we can use it to find the process that submitted the batch and then add the culprit into the error state. This is a slightly different approach from Ben's in that instead of adding the extra information into the struct i915_hw_context, we use the information already captured in struct drm_file which is then referenced from the request. v2: Also capture the workaround buffer for gen2, so that we can compare its contents against the intended batch for the active request. v3: Rebase (Mika) v4: Check for null context (Chris) checkpatch warnings fixed Link: http://lists.freedesktop.org/archives/intel-gfx/2013-August/032280.html Signed-off-by: Chris Wilson (v2) Signed-off-by: Mika Kuoppala (v4) Acked-by: Ben Widawsky Cc: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 136 +++++++++++++++----------- 3 files changed, 86 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fe4427be2e03e..826fcaef25c1b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -360,7 +360,7 @@ struct drm_i915_error_state { int page_count; u32 gtt_offset; u32 *pages[0]; - } *ringbuffer, *batchbuffer, *ctx, *hws_page; + } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; struct drm_i915_error_request { long jiffies; @@ -375,6 +375,9 @@ struct drm_i915_error_state { u32 pp_dir_base; }; } vm_info; + + pid_t pid; + char comm[TASK_COMM_LEN]; } ring[I915_NUM_RINGS]; struct drm_i915_error_buffer { u32 size; @@ -1797,6 +1800,7 @@ struct drm_i915_gem_request { struct drm_i915_file_private { struct drm_i915_private *dev_priv; + struct drm_file *file; struct { spinlock_t lock; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c5a182be2eb05..6e17b45db8507 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4857,6 +4857,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) file->driver_priv = file_priv; file_priv->dev_priv = dev->dev_private; + file_priv->file = file; spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eed1b34eaf477..8b02498ee9636 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -301,13 +301,28 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } +static void print_error_obj(struct drm_i915_error_state_buf *m, + struct drm_i915_error_object *obj) +{ + int page, offset, elt; + + for (page = offset = 0; page < obj->page_count; page++) { + for (elt = 0; elt < PAGE_SIZE/4; elt++) { + err_printf(m, "%08x : %08x\n", offset, + obj->pages[page][elt]); + offset += 4; + } + } +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_error_state_file_priv *error_priv) { struct drm_device *dev = error_priv->dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_error_state *error = error_priv->error; - int i, j, page, offset, elt; + int i, j, offset, elt; + int max_hangcheck_score; if (!error) { err_printf(m, "no error state collected\n"); @@ -317,6 +332,20 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); + max_hangcheck_score = 0; + for (i = 0; i < ARRAY_SIZE(error->ring); i++) { + if (error->ring[i].hangcheck_score > max_hangcheck_score) + max_hangcheck_score = error->ring[i].hangcheck_score; + } + for (i = 0; i < ARRAY_SIZE(error->ring); i++) { + if (error->ring[i].hangcheck_score == max_hangcheck_score && + error->ring[i].pid != -1) { + err_printf(m, "Active process (on ring %s): %s [%d]\n", + ring_str(i), + error->ring[i].comm, + error->ring[i].pid); + } + } err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); @@ -359,18 +388,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->ring); i++) { struct drm_i915_error_object *obj; - if ((obj = error->ring[i].batchbuffer)) { - err_printf(m, "%s --- gtt_offset = 0x%08x\n", - dev_priv->ring[i].name, + obj = error->ring[i].batchbuffer; + if (obj) { + err_puts(m, dev_priv->ring[i].name); + if (error->ring[i].pid != -1) + err_printf(m, " (submitted by %s [%d])", + error->ring[i].comm, + error->ring[i].pid); + err_printf(m, " --- gtt_offset = 0x%08x\n", obj->gtt_offset); - offset = 0; - for (page = 0; page < obj->page_count; page++) { - for (elt = 0; elt < PAGE_SIZE/4; elt++) { - err_printf(m, "%08x : %08x\n", offset, - obj->pages[page][elt]); - offset += 4; - } - } + print_error_obj(m, obj); + } + + obj = error->ring[i].wa_batchbuffer; + if (obj) { + err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n", + dev_priv->ring[i].name, obj->gtt_offset); + print_error_obj(m, obj); } if (error->ring[i].num_requests) { @@ -389,15 +423,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "%s --- ringbuffer = 0x%08x\n", dev_priv->ring[i].name, obj->gtt_offset); - offset = 0; - for (page = 0; page < obj->page_count; page++) { - for (elt = 0; elt < PAGE_SIZE/4; elt++) { - err_printf(m, "%08x : %08x\n", - offset, - obj->pages[page][elt]); - offset += 4; - } - } + print_error_obj(m, obj); } if ((obj = error->ring[i].hws_page)) { @@ -713,39 +739,6 @@ static void i915_gem_record_fences(struct drm_device *dev, } } -static struct drm_i915_error_object * -i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, - struct intel_ring_buffer *ring) -{ - struct drm_i915_gem_request *request; - - if (HAS_BROKEN_CS_TLB(dev_priv->dev)) { - struct drm_i915_gem_object *obj; - u32 acthd = I915_READ(ACTHD); - - if (WARN_ON(ring->id != RCS)) - return NULL; - - obj = ring->scratch.obj; - if (obj != NULL && - acthd >= i915_gem_obj_ggtt_offset(obj) && - acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size) - return i915_error_ggtt_object_create(dev_priv, obj); - } - - request = i915_gem_find_active_request(ring); - if (request == NULL) - return NULL; - - /* We need to copy these to an anonymous buffer as the simplest - * method to avoid being overwritten by userspace. - */ - return i915_error_object_create(dev_priv, request->batch_obj, - request->ctx ? - request->ctx->vm : - &dev_priv->gtt.base); -} - static void i915_record_ring_state(struct drm_device *dev, struct intel_ring_buffer *ring, struct drm_i915_error_ring *ering) @@ -894,8 +887,39 @@ static void i915_gem_record_rings(struct drm_device *dev, i915_record_ring_state(dev, ring, &error->ring[i]); - error->ring[i].batchbuffer = - i915_error_first_batchbuffer(dev_priv, ring); + error->ring[i].pid = -1; + request = i915_gem_find_active_request(ring); + if (request) { + /* We need to copy these to an anonymous buffer + * as the simplest method to avoid being overwritten + * by userspace. + */ + error->ring[i].batchbuffer = + i915_error_object_create(dev_priv, + request->batch_obj, + request->ctx ? + request->ctx->vm : + &dev_priv->gtt.base); + + if (HAS_BROKEN_CS_TLB(dev_priv->dev) && + ring->scratch.obj) + error->ring[i].wa_batchbuffer = + i915_error_ggtt_object_create(dev_priv, + ring->scratch.obj); + + if (request->file_priv) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(request->file_priv->file->pid, + PIDTYPE_PID); + if (task) { + strcpy(error->ring[i].comm, task->comm); + error->ring[i].pid = task->pid; + } + rcu_read_unlock(); + } + } error->ring[i].ringbuffer = i915_error_ggtt_object_create(dev_priv, ring->obj); From cb38300215dc24886347bfc6400ccfed806dac21 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 25 Feb 2014 17:11:25 +0200 Subject: [PATCH 058/107] drm/i915: Add error code into error state commit 011cf577b2531dfbd2254bd9ec147ad71471abaf Author: Ben Widawsky Date: Tue Feb 4 12:18:55 2014 +0000 drm/i915: Generate a hang error code added error code debug into dmesg. Store this also with error state to make matching dmesg logs and error states easier. As we need to have full ring state for error code generation, do full capture always, print hang message into log and then decide if we need to keep the error state. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gpu_error.c | 61 +++++++++++++++++---------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 826fcaef25c1b..7db735c1e7645 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -303,6 +303,8 @@ struct drm_i915_error_state { struct kref ref; struct timeval time; + char error_msg[128]; + /* Generic register state */ u32 eir; u32 pgtbl_er; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8b02498ee9636..75dd0562bb4a3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -329,6 +329,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, goto out; } + err_printf(m, "%s\n", error->error_msg); err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); @@ -689,7 +690,8 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct drm_i915_error_state *error, + int *ring_id) { uint32_t error_code = 0; int i; @@ -699,9 +701,14 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, * synchronization commands which almost always appear in the case * strictly a client bug. Use instdone to differentiate those some. */ - for (i = 0; i < I915_NUM_RINGS; i++) - if (error->ring[i].hangcheck_action == HANGCHECK_HUNG) + for (i = 0; i < I915_NUM_RINGS; i++) { + if (error->ring[i].hangcheck_action == HANGCHECK_HUNG) { + if (ring_id) + *ring_id = i; + return error->ring[i].ipehr ^ error->ring[i].instdone; + } + } return error_code; } @@ -1086,6 +1093,19 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, i915_get_extra_instdone(dev, error->extra_instdone); } +static void i915_error_capture_msg(struct drm_device *dev, + struct drm_i915_error_state *error) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 ecode; + int ring_id = -1; + + ecode = i915_error_generate_code(dev_priv, error, &ring_id); + + scnprintf(error->error_msg, sizeof(error->error_msg), + "GPU HANG: ecode %d:0x%08x", ring_id, ecode); +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1101,13 +1121,6 @@ void i915_capture_error_state(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_error_state *error; unsigned long flags; - uint32_t ecode; - - spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); - error = dev_priv->gpu_error.first_error; - spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); - if (error) - return; /* Account for pipe specific data like PIPE*STAT */ error = kzalloc(sizeof(*error), GFP_ATOMIC); @@ -1116,30 +1129,21 @@ void i915_capture_error_state(struct drm_device *dev) return; } - DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", - dev->primary->index); kref_init(&error->ref); i915_capture_reg_state(dev_priv, error); i915_gem_capture_buffers(dev_priv, error); i915_gem_record_fences(dev, error); i915_gem_record_rings(dev, error); - ecode = i915_error_generate_code(dev_priv, error); - - if (!warned) { - DRM_INFO("GPU HANG [%x]\n", ecode); - DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); - DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); - DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); - DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); - warned = true; - } do_gettimeofday(&error->time); error->overlay = intel_overlay_capture_error_state(dev); error->display = intel_display_capture_error_state(dev); + i915_error_capture_msg(dev, error); + DRM_INFO("%s\n", error->error_msg); + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); if (dev_priv->gpu_error.first_error == NULL) { dev_priv->gpu_error.first_error = error; @@ -1147,8 +1151,19 @@ void i915_capture_error_state(struct drm_device *dev) } spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); - if (error) + if (error) { i915_error_state_free(&error->ref); + return; + } + + if (!warned) { + DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); + DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); + DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); + DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); + DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", dev->primary->index); + warned = true; + } } void i915_error_state_get(struct drm_device *dev, From 581744626d89930a03f307558182896a4f51173c Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 25 Feb 2014 17:11:26 +0200 Subject: [PATCH 059/107] drm/i915: Add reason for capture in error state We capture error state not only when the GPU hangs but also on other situations as in interrupt errors and in situations where we can kick things forward without GPU reset. There will be log entry on most of these cases. But as error state capture might be only thing we have, if dmesg was not captured. Or as in GEN4 case, interrupt error can trigger error state capture without log entry, the exact reason why capture was made is hard to decipher. v2: Split out the the error code stuff to separate patch (Ben) References: https://bugs.freedesktop.org/show_bug.cgi?id=74193 Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++- drivers/gpu/drm/i915/i915_drv.h | 7 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 27 ++++++++++++---- drivers/gpu/drm/i915/i915_irq.c | 46 +++++++++++++++++---------- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aef779b4193d0..43f30746c8dcf 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3190,9 +3190,8 @@ i915_wedged_set(void *data, u64 val) { struct drm_device *dev = data; - DRM_INFO("Manually setting wedged to %llu\n", val); - i915_handle_error(dev, val); - + i915_handle_error(dev, val, + "Manually setting wedged to %llu", val); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7db735c1e7645..2b0da9592fe88 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2008,7 +2008,9 @@ extern void intel_console_resume(struct work_struct *work); /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); -void i915_handle_error(struct drm_device *dev, bool wedged); +__printf(3, 4) +void i915_handle_error(struct drm_device *dev, bool wedged, + const char *fmt, ...); void gen6_set_pm_mask(struct drm_i915_private *dev_priv, u32 pm_iir, int new_delay); @@ -2449,7 +2451,8 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } -void i915_capture_error_state(struct drm_device *dev); +void i915_capture_error_state(struct drm_device *dev, bool wedge, + const char *error_msg); void i915_error_state_get(struct drm_device *dev, struct i915_error_state_file_priv *error_priv); void i915_error_state_put(struct i915_error_state_file_priv *error_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 75dd0562bb4a3..7b2afa00bccbf 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1094,16 +1094,30 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } static void i915_error_capture_msg(struct drm_device *dev, - struct drm_i915_error_state *error) + struct drm_i915_error_state *error, + bool wedged, + const char *error_msg) { struct drm_i915_private *dev_priv = dev->dev_private; u32 ecode; - int ring_id = -1; + int ring_id = -1, len; ecode = i915_error_generate_code(dev_priv, error, &ring_id); - scnprintf(error->error_msg, sizeof(error->error_msg), - "GPU HANG: ecode %d:0x%08x", ring_id, ecode); + len = scnprintf(error->error_msg, sizeof(error->error_msg), + "GPU HANG: ecode %d:0x%08x", ring_id, ecode); + + if (ring_id != -1 && error->ring[ring_id].pid != -1) + len += scnprintf(error->error_msg + len, + sizeof(error->error_msg) - len, + ", in %s [%d]", + error->ring[ring_id].comm, + error->ring[ring_id].pid); + + scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, + ", reason: %s, action: %s", + error_msg, + wedged ? "reset" : "continue"); } /** @@ -1115,7 +1129,8 @@ static void i915_error_capture_msg(struct drm_device *dev, * out a structure which becomes available in debugfs for user level tools * to pick up. */ -void i915_capture_error_state(struct drm_device *dev) +void i915_capture_error_state(struct drm_device *dev, bool wedged, + const char *error_msg) { static bool warned; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1141,7 +1156,7 @@ void i915_capture_error_state(struct drm_device *dev) error->overlay = intel_overlay_capture_error_state(dev); error->display = intel_display_capture_error_state(dev); - i915_error_capture_msg(dev, error); + i915_error_capture_msg(dev, error, wedged, error_msg); DRM_INFO("%s\n", error->error_msg); spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 484415bb34974..3e8359eb8b0fa 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1297,8 +1297,8 @@ static void snb_gt_irq_handler(struct drm_device *dev, if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) { - DRM_ERROR("GT error interrupt 0x%08x\n", gt_iir); - i915_handle_error(dev, false); + i915_handle_error(dev, false, "GT error interrupt 0x%08x", + gt_iir); } if (gt_iir & GT_PARITY_ERROR(dev)) @@ -1545,8 +1545,9 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) { - DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir); - i915_handle_error(dev_priv->dev, false); + i915_handle_error(dev_priv->dev, false, + "VEBOX CS error interrupt 0x%08x", + pm_iir); } } } @@ -2278,11 +2279,18 @@ static void i915_report_and_clear_eir(struct drm_device *dev) * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -void i915_handle_error(struct drm_device *dev, bool wedged) +void i915_handle_error(struct drm_device *dev, bool wedged, + const char *fmt, ...) { struct drm_i915_private *dev_priv = dev->dev_private; + va_list args; + char error_msg[80]; - i915_capture_error_state(dev); + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + i915_capture_error_state(dev, wedged, error_msg); i915_report_and_clear_eir(dev); if (wedged) { @@ -2585,9 +2593,9 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) */ tmp = I915_READ_CTL(ring); if (tmp & RING_WAIT) { - DRM_ERROR("Kicking stuck wait on %s\n", - ring->name); - i915_handle_error(dev, false); + i915_handle_error(dev, false, + "Kicking stuck wait on %s", + ring->name); I915_WRITE_CTL(ring, tmp); return HANGCHECK_KICK; } @@ -2597,9 +2605,9 @@ ring_stuck(struct intel_ring_buffer *ring, u32 acthd) default: return HANGCHECK_HUNG; case 1: - DRM_ERROR("Kicking stuck semaphore on %s\n", - ring->name); - i915_handle_error(dev, false); + i915_handle_error(dev, false, + "Kicking stuck semaphore on %s", + ring->name); I915_WRITE_CTL(ring, tmp); return HANGCHECK_KICK; case 0: @@ -2721,7 +2729,7 @@ static void i915_hangcheck_elapsed(unsigned long data) } if (rings_hung) - return i915_handle_error(dev, true); + return i915_handle_error(dev, true, "Ring hung"); if (busy_count) /* Reset timer case chip hangs without another request @@ -3338,7 +3346,9 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) */ spin_lock_irqsave(&dev_priv->irq_lock, irqflags); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev, false); + i915_handle_error(dev, false, + "Command parser error, iir 0x%08x", + iir); for_each_pipe(pipe) { int reg = PIPESTAT(pipe); @@ -3520,7 +3530,9 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) */ spin_lock_irqsave(&dev_priv->irq_lock, irqflags); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev, false); + i915_handle_error(dev, false, + "Command parser error, iir 0x%08x", + iir); for_each_pipe(pipe) { int reg = PIPESTAT(pipe); @@ -3757,7 +3769,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) */ spin_lock_irqsave(&dev_priv->irq_lock, irqflags); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev, false); + i915_handle_error(dev, false, + "Command parser error, iir 0x%08x", + iir); for_each_pipe(pipe) { int reg = PIPESTAT(pipe); From 48b031e30d46b11fdf7a8075f02f5f9eef728f4d Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 25 Feb 2014 17:11:27 +0200 Subject: [PATCH 060/107] drm/i915: Add reset count to error state By default we keep only the error state from first hang. However some sneaky user might have cleared the first error state and we assume mistakenly that it is from first hang. As sometimes this matters, it is better to explicitly store the reset count. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2b0da9592fe88..a6429cc93d1a1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -304,6 +304,7 @@ struct drm_i915_error_state { struct timeval time; char error_msg[128]; + u32 reset_count; /* Generic register state */ u32 eir; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7b2afa00bccbf..353f0770da8f7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -347,6 +347,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->ring[i].pid); } } + err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); @@ -1120,6 +1121,12 @@ static void i915_error_capture_msg(struct drm_device *dev, wedged ? "reset" : "continue"); } +static void i915_capture_gen_state(struct drm_i915_private *dev_priv, + struct drm_i915_error_state *error) +{ + error->reset_count = i915_reset_count(&dev_priv->gpu_error); +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1146,6 +1153,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged, kref_init(&error->ref); + i915_capture_gen_state(dev_priv, error); i915_capture_reg_state(dev_priv, error); i915_gem_capture_buffers(dev_priv, error); i915_gem_record_fences(dev, error); From 62d5d69b49b6fea9905e36e67cc6c4fc5a17d75f Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 25 Feb 2014 17:11:28 +0200 Subject: [PATCH 061/107] drm/i915: Add suspend count to error state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example if we get bug reports with similar error states and suspend count is always 1, that might lead the Sherlocks to right general direction. Suggested-by: Ville Syrjälä Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 70a4c9bb7b809..a50292c0423fb 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -479,6 +479,8 @@ static int i915_drm_freeze(struct drm_device *dev) intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED); console_unlock(); + dev_priv->suspend_count++; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6429cc93d1a1..e33f6a7639602 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -305,6 +305,7 @@ struct drm_i915_error_state { char error_msg[128]; u32 reset_count; + u32 suspend_count; /* Generic register state */ u32 eir; @@ -1606,6 +1607,8 @@ typedef struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + + u32 suspend_count; } drm_i915_private_t; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 353f0770da8f7..ce2dd608968c9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -348,6 +348,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } err_printf(m, "Reset count: %u\n", error->reset_count); + err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); @@ -1125,6 +1126,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { error->reset_count = i915_reset_count(&dev_priv->gpu_error); + error->suspend_count = dev_priv->suspend_count; } /** From c8966e1058e1e8ae2eec4211157847032829697a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 26 Feb 2014 23:59:30 -0800 Subject: [PATCH 062/107] drm/i915: Add a partial instruction shootdown workaround on Broadwell. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I believe this will be necessary on production hardware. Signed-off-by: Kenneth Graunke Reviewed-by: Ville Syrjälä Reviewed-by: Ben Widawsky [danvet: Fix whitespace fail spotted by checkpatch. Also add missing :bdw w/a tag that Ville spotted.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e3130352dfaa6..d575baf52d548 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5048,6 +5048,9 @@ #define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1<<10) #define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1<<3) +#define GEN8_ROW_CHICKEN 0xe4f0 +#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1<<8) + #define GEN7_ROW_CHICKEN2 0xe4f4 #define GEN7_ROW_CHICKEN2_GT2 0xf4f4 #define DOP_CLOCK_GATING_DISABLE (1<<0) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2ded0f6d543be..f21c9f3ee6430 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4801,6 +4801,10 @@ static void gen8_init_clock_gating(struct drm_device *dev) /* FIXME(BDW): Check all the w/a, some might only apply to * pre-production hw. */ + /* WaDisablePartialInstShootdown:bdw */ + I915_WRITE(GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + /* * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for * pre-production hardware From 1411e6a57a1836ba8a3d4f17c8733b2fbaf0f005 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 26 Feb 2014 23:59:31 -0800 Subject: [PATCH 063/107] drm/i915: Add thread stall DOP clock gating workaround on Broadwell. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ben and I believe this will be necessary on production hardware. Signed-off-by: Kenneth Graunke [danvet: Shuffle lines to group all ROW_CHICKEN writes and add a cautious comment that this might not be needed on production hw.] Reviewed-by: Ville Syrjälä Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d575baf52d548..aa8390978e636 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5050,6 +5050,7 @@ #define GEN8_ROW_CHICKEN 0xe4f0 #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1<<8) +#define STALL_DOP_GATING_DISABLE (1<<5) #define GEN7_ROW_CHICKEN2 0xe4f4 #define GEN7_ROW_CHICKEN2_GT2 0xf4f4 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f21c9f3ee6430..bb2ca355c1b05 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4805,6 +4805,11 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_ROW_CHICKEN, _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + /* WaDisableThreadStallDopClockGating:bdw */ + /* FIXME: Unclear whether we really need this on production bdw. */ + I915_WRITE(GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + /* * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for * pre-production hardware From c923facd535b97972b5bb7d3df4fcafd61a63a5e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 5 Mar 2014 14:17:28 +0200 Subject: [PATCH 064/107] drm/i915: don't flood the logs about bdw semaphores BDW is no longer flagged as preliminary hw, but without i915.preliminary_hw_support module param set the logs are filled with WARNs about it. Just make semaphores off the BDW per-chip default for now. CC: Ben Widawsky Reported-by: Sebastien Dufour Signed-off-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a50292c0423fb..6ac91fcdb4f8b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -403,15 +403,13 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 6) return false; - /* Until we get further testing... */ - if (IS_GEN8(dev)) { - WARN_ON(!i915.preliminary_hw_support); - return false; - } - if (i915.semaphores >= 0) return i915.semaphores; + /* Until we get further testing... */ + if (IS_GEN8(dev)) + return false; + #ifdef CONFIG_INTEL_IOMMU /* Enable semaphores on SNB when IO remapping is off */ if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) From 0a089e3355d77f758e46db54a0a81d4b58a28cc3 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 21 Feb 2014 17:32:00 +0200 Subject: [PATCH 065/107] drm/i915: Do forcewake reset on gen8 When we get control from BIOS there might be mt forcewake bits already set. This causes us to do double mt get without proper clear/ack sequence. Fix this by clearing mt forcewake register on init, like we do with older gens. Signed-off-by: Mika Kuoppala Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 04bd9717b021f..de72415f2939f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -310,13 +310,13 @@ static void intel_uncore_forcewake_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (IS_VALLEYVIEW(dev)) { + if (IS_VALLEYVIEW(dev)) vlv_force_wake_reset(dev_priv); - } else if (INTEL_INFO(dev)->gen >= 6) { + else if (IS_GEN6(dev) || IS_GEN7(dev)) __gen6_gt_force_wake_reset(dev_priv); - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) - __gen6_gt_force_wake_mt_reset(dev_priv); - } + + if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_GEN8(dev)) + __gen6_gt_force_wake_mt_reset(dev_priv); } void intel_uncore_early_sanitize(struct drm_device *dev) From 6a68735a9d1fc8b10828f6775a363170f02a862b Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 21 Feb 2014 18:47:36 +0200 Subject: [PATCH 066/107] drm/i915: Don't access fifodbg registers on gen8 as they don't exists. v2: rename gen6_*_mt_* to gen7_*_mt_* as they never get called with gen6 (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Ben Widawsky (v1) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index de72415f2939f..6ca24acd8b3ea 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -89,14 +89,14 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, __gen6_gt_wait_for_thread_c0(dev_priv); } -static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv) +static void __gen7_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv) { __raw_i915_write32(dev_priv, FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff)); /* something from same cacheline, but !FORCEWAKE_MT */ __raw_posting_read(dev_priv, ECOBUS); } -static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv, +static void __gen7_gt_force_wake_mt_get(struct drm_i915_private *dev_priv, int fw_engine) { u32 forcewake_ack; @@ -142,14 +142,16 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, gen6_gt_check_fifodbg(dev_priv); } -static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv, +static void __gen7_gt_force_wake_mt_put(struct drm_i915_private *dev_priv, int fw_engine) { __raw_i915_write32(dev_priv, FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); /* something from same cacheline, but !FORCEWAKE_MT */ __raw_posting_read(dev_priv, ECOBUS); - gen6_gt_check_fifodbg(dev_priv); + + if (IS_GEN7(dev_priv->dev)) + gen6_gt_check_fifodbg(dev_priv); } static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) @@ -316,7 +318,7 @@ static void intel_uncore_forcewake_reset(struct drm_device *dev) __gen6_gt_force_wake_reset(dev_priv); if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_GEN8(dev)) - __gen6_gt_force_wake_mt_reset(dev_priv); + __gen7_gt_force_wake_mt_reset(dev_priv); } void intel_uncore_early_sanitize(struct drm_device *dev) @@ -690,8 +692,8 @@ void intel_uncore_init(struct drm_device *dev) dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get; dev_priv->uncore.funcs.force_wake_put = __vlv_force_wake_put; } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { - dev_priv->uncore.funcs.force_wake_get = __gen6_gt_force_wake_mt_get; - dev_priv->uncore.funcs.force_wake_put = __gen6_gt_force_wake_mt_put; + dev_priv->uncore.funcs.force_wake_get = __gen7_gt_force_wake_mt_get; + dev_priv->uncore.funcs.force_wake_put = __gen7_gt_force_wake_mt_put; } else if (IS_IVYBRIDGE(dev)) { u32 ecobus; @@ -705,16 +707,16 @@ void intel_uncore_init(struct drm_device *dev) * forcewake being disabled. */ mutex_lock(&dev->struct_mutex); - __gen6_gt_force_wake_mt_get(dev_priv, FORCEWAKE_ALL); + __gen7_gt_force_wake_mt_get(dev_priv, FORCEWAKE_ALL); ecobus = __raw_i915_read32(dev_priv, ECOBUS); - __gen6_gt_force_wake_mt_put(dev_priv, FORCEWAKE_ALL); + __gen7_gt_force_wake_mt_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev->struct_mutex); if (ecobus & FORCEWAKE_MT_ENABLE) { dev_priv->uncore.funcs.force_wake_get = - __gen6_gt_force_wake_mt_get; + __gen7_gt_force_wake_mt_get; dev_priv->uncore.funcs.force_wake_put = - __gen6_gt_force_wake_mt_put; + __gen7_gt_force_wake_mt_put; } else { DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n"); DRM_INFO("when using vblank-synced partial screen updates.\n"); @@ -988,7 +990,10 @@ static int gen6_do_reset(struct drm_device *dev) } /* Restore fifo count */ - dev_priv->uncore.fifo_count = __raw_i915_read32(dev_priv, GTFIFOCTL) & GT_FIFO_FREE_ENTRIES_MASK; + if (IS_GEN6(dev) || IS_GEN7(dev)) + dev_priv->uncore.fifo_count = + __raw_i915_read32(dev_priv, GTFIFOCTL) & + GT_FIFO_FREE_ENTRIES_MASK; spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); return ret; From 8f7abfd82246a8d8b5bd1ad3056f3b46345b6b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Feb 2014 14:23:12 +0200 Subject: [PATCH 067/107] drm/i915: Fix DDI port_clock for VGA output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On DDI there's no PLL as such to generate the pixel clock for VGA. Instead we derive the pixel clock from the FDI link frequency. So to make .compute_config match what .get_config does, we need to set the port_clock based on the FDI link frequency. Note that we don't even check the port_clock when selecting the PLL for VGA output. We just assume SPLL at 1.35GHz is what we want, and that does match with the asumption of FDI frequency of 2.7Ghz we have in intel_fdi_link_freq(). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74955 Signed-off-by: Ville Syrjälä Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 4c1230c737d5d..469071db35280 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -262,6 +262,10 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, if (HAS_PCH_LPT(dev)) pipe_config->pipe_bpp = 24; + /* FDI must always be 2.7 GHz */ + if (HAS_DDI(dev)) + pipe_config->port_clock = 135000 * 2; + return true; } From 619d4d04723346bf6ca3273668e0ba2582e8de36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Feb 2014 14:23:14 +0200 Subject: [PATCH 068/107] drm/i915: Use DIV_ROUND_UP() when calculating number of required FDI lanes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we need precisely N lanes to satisfy the FDI bandwidth requirement, the code would still claim that we need N+1 lanes. Use DIV_ROUND_UP() to get a more accurate answer. Signed-off-by: Ville Syrjälä Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45bd176b68b58..227f6602de0ae 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6161,7 +6161,7 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) * is 2.5%; use 5% for safety's sake. */ u32 bps = target_clock * bpp * 21 / 20; - return bps / (link_bw * 8) + 1; + return DIV_ROUND_UP(bps, link_bw * 8); } static bool ironlake_needs_fb_cb_tune(struct dpll *dpll, int factor) From 295e8bb73a4785b65db6655fbf6ad57c4177b551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Feb 2014 21:59:01 +0200 Subject: [PATCH 069/107] drm/i915: Disable semaphore wait event idle message on BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to BSpec we need to always set this magic bit in ring buffer mode. Signed-off-by: Ville Syrjälä Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index aa8390978e636..d11a9ea5884d5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -949,6 +949,9 @@ #define GEN6_BLITTER_LOCK_SHIFT 16 #define GEN6_BLITTER_FBC_NOTIFY (1<<3) +#define GEN6_RC_SLEEP_PSMI_CONTROL 0x2050 +#define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) + #define GEN6_BSD_SLEEP_PSMI_CONTROL 0x12050 #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bb2ca355c1b05..739eabc814e91 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4867,6 +4867,9 @@ static void gen8_init_clock_gating(struct drm_device *dev) */ I915_WRITE(GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); } static void haswell_init_clock_gating(struct drm_device *dev) From 4f1ca9e94057de098d65bc7477e8f89dd51609aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Feb 2014 21:59:02 +0200 Subject: [PATCH 070/107] drm/i915: Implement WaDisableSDEUnitClockGating:bdw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d11a9ea5884d5..c913ca5c3b392 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4906,6 +4906,9 @@ #define GEN7_UCGCTL4 0x940c #define GEN7_L3BANK2X_CLOCK_GATE_DISABLE (1<<25) +#define GEN8_UCGCTL6 0x9430 +#define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1<<14) + #define GEN6_RPNSWREQ 0xA008 #define GEN6_TURBO_DISABLE (1<<31) #define GEN6_FREQUENCY(x) ((x)<<25) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 739eabc814e91..5819f5c92554c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4870,6 +4870,10 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); + + /* WaDisableSDEUnitClockGating:bdw */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); } static void haswell_init_clock_gating(struct drm_device *dev) From 8285222c487b61c48b9b955b82598544c3c06050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Feb 2014 21:59:03 +0200 Subject: [PATCH 071/107] drm/i915: We implement WaDisableAsyncFlipPerfMode:bdw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 76162acf70155..b2d4f4852c986 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -571,7 +571,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. * - * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw */ if (INTEL_INFO(dev)->gen >= 6) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); From 8cc87b7549969e532317077d325233779f8b96b6 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:31:44 +0000 Subject: [PATCH 072/107] drm/i915: Use a pipe variable to cycle through the pipes I recently fumbled a patch because I wrote twice num_sprites[i], and it was the right thing to do in only 50% of the cases. This patch ensures I need to write num_sprites[pipe], ie it should be self-documented that it's per-pipe number of sprites without having to look at what is 'i' this time around. It's all a lame excuse, but it does make it harder to redo the same mistake. Signed-off-by: Damien Lespiau Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 227f6602de0ae..1ab9e3f37acb0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10981,7 +10981,8 @@ void intel_modeset_suspend_hw(struct drm_device *dev) void intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i, j, ret; + int j, ret; + enum pipe pipe; drm_mode_config_init(dev); @@ -11018,13 +11019,13 @@ void intel_modeset_init(struct drm_device *dev) INTEL_INFO(dev)->num_pipes, INTEL_INFO(dev)->num_pipes > 1 ? "s" : ""); - for_each_pipe(i) { - intel_crtc_init(dev, i); + for_each_pipe(pipe) { + intel_crtc_init(dev, pipe); for (j = 0; j < INTEL_INFO(dev)->num_sprites; j++) { - ret = intel_plane_init(dev, i, j); + ret = intel_plane_init(dev, pipe, j); if (ret) DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", - pipe_name(i), sprite_name(i, j), ret); + pipe_name(pipe), sprite_name(pipe, j), ret); } } From e3d51285348a6564356010465411c9a60d070a51 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:31:45 +0000 Subject: [PATCH 073/107] drm/i915: Don't declare unnecessary shadowing variable 'i' is already defined in the function scope and used elsewhere. Let's use it instead. Signed-off-by: Damien Lespiau Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 43f30746c8dcf..84d39a989e210 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -602,7 +602,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data) intel_runtime_pm_get(dev_priv); if (INTEL_INFO(dev)->gen >= 8) { - int i; seq_printf(m, "Master Interrupt Control:\t%08x\n", I915_READ(GEN8_MASTER_IRQ)); From 07d27e20bc4ab2c8f969df8ebd7622320a0cdd92 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:31:46 +0000 Subject: [PATCH 074/107] drm/i915: Replace a few for_each_pipe(i) by for_each_pipe(pipe) Consistency throughout the code base is good and remove some room for mistakes (as explained in the "drm/i915: Use a pipe variable to cycle through the pipes" commit) So, let's replace the for_each_pipe(i) occurences by for_each_pipe(pipe) when it's reasonable and practical to do so (eg. when there isn't another pipe variable already). Suggested-by: Chris Wilson Signed-off-by: Damien Lespiau Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++------- drivers/gpu/drm/i915/i915_irq.c | 14 +++++++------- drivers/gpu/drm/i915/intel_pm.c | 16 ++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 84d39a989e210..fbcf536924e0f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -614,16 +614,16 @@ static int i915_interrupt_info(struct seq_file *m, void *data) i, I915_READ(GEN8_GT_IER(i))); } - for_each_pipe(i) { + for_each_pipe(pipe) { seq_printf(m, "Pipe %c IMR:\t%08x\n", - pipe_name(i), - I915_READ(GEN8_DE_PIPE_IMR(i))); + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IMR(pipe))); seq_printf(m, "Pipe %c IIR:\t%08x\n", - pipe_name(i), - I915_READ(GEN8_DE_PIPE_IIR(i))); + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IIR(pipe))); seq_printf(m, "Pipe %c IER:\t%08x\n", - pipe_name(i), - I915_READ(GEN8_DE_PIPE_IER(i))); + pipe_name(pipe), + I915_READ(GEN8_DE_PIPE_IER(pipe))); } seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3e8359eb8b0fa..939139bc70b1f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1867,7 +1867,7 @@ static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir) static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir) { struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe i; + enum pipe pipe; if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev); @@ -1878,14 +1878,14 @@ static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir) if (de_iir & DE_GSE_IVB) intel_opregion_asle_intr(dev); - for_each_pipe(i) { - if (de_iir & (DE_PIPE_VBLANK_IVB(i))) - drm_handle_vblank(dev, i); + for_each_pipe(pipe) { + if (de_iir & (DE_PIPE_VBLANK_IVB(pipe))) + drm_handle_vblank(dev, pipe); /* plane/pipes map 1:1 on ilk+ */ - if (de_iir & DE_PLANE_FLIP_DONE_IVB(i)) { - intel_prepare_page_flip(dev, i); - intel_finish_page_flip_plane(dev, i); + if (de_iir & DE_PLANE_FLIP_DONE_IVB(pipe)) { + intel_prepare_page_flip(dev, pipe); + intel_finish_page_flip_plane(dev, pipe); } } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5819f5c92554c..8df1826a6015f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4792,7 +4792,7 @@ static void lpt_suspend_hw(struct drm_device *dev) static void gen8_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe i; + enum pipe pipe; I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); @@ -4837,9 +4837,9 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ - for_each_pipe(i) { - I915_WRITE(CHICKEN_PIPESL_1(i), - I915_READ(CHICKEN_PIPESL_1(i) | + for_each_pipe(pipe) { + I915_WRITE(CHICKEN_PIPESL_1(pipe), + I915_READ(CHICKEN_PIPESL_1(pipe) | DPRS_MASK_VBLANK_SRD)); } @@ -5310,7 +5310,7 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; - enum pipe p; + enum pipe pipe; unsigned long irqflags; /* @@ -5321,9 +5321,9 @@ static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv) * FIXME: Should we do this in general in drm_vblank_post_modeset? */ spin_lock_irqsave(&dev->vbl_lock, irqflags); - for_each_pipe(p) - if (p != PIPE_A) - dev->vblank[p].last = 0; + for_each_pipe(pipe) + if (pipe != PIPE_A) + dev->vblank[pipe].last = 0; spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } From 1fe477856e2237bdc26173ea203ce99ff6f1392b Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:31:47 +0000 Subject: [PATCH 075/107] drm/i915: Add a for_each_sprite() macro This macro is similar to for_each_pipe() we already have. Convert the two call sites we have at the same time. Signed-off-by: Damien Lespiau Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e33f6a7639602..2f74a773f59eb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -159,6 +159,7 @@ enum hpd_pin { I915_GEM_DOMAIN_VERTEX) #define for_each_pipe(p) for ((p) = 0; (p) < INTEL_INFO(dev)->num_pipes; (p)++) +#define for_each_sprite(p, s) for ((s) = 0; (s) < INTEL_INFO(dev)->num_sprites; (s)++) #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1ab9e3f37acb0..91c2e3b063964 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1188,16 +1188,16 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv, enum pipe pipe) { struct drm_device *dev = dev_priv->dev; - int reg, i; + int reg, sprite; u32 val; if (IS_VALLEYVIEW(dev)) { - for (i = 0; i < INTEL_INFO(dev)->num_sprites; i++) { - reg = SPCNTR(pipe, i); + for_each_sprite(pipe, sprite) { + reg = SPCNTR(pipe, sprite); val = I915_READ(reg); WARN((val & SP_ENABLE), "sprite %c assertion failure, should be off on pipe %c but is still active\n", - sprite_name(pipe, i), pipe_name(pipe)); + sprite_name(pipe, sprite), pipe_name(pipe)); } } else if (INTEL_INFO(dev)->gen >= 7) { reg = SPRCTL(pipe); @@ -10981,7 +10981,7 @@ void intel_modeset_suspend_hw(struct drm_device *dev) void intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int j, ret; + int sprite, ret; enum pipe pipe; drm_mode_config_init(dev); @@ -11021,11 +11021,11 @@ void intel_modeset_init(struct drm_device *dev) for_each_pipe(pipe) { intel_crtc_init(dev, pipe); - for (j = 0; j < INTEL_INFO(dev)->num_sprites; j++) { - ret = intel_plane_init(dev, pipe, j); + for_each_sprite(pipe, sprite) { + ret = intel_plane_init(dev, pipe, sprite); if (ret) DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", - pipe_name(pipe), sprite_name(pipe, j), ret); + pipe_name(pipe), sprite_name(pipe, sprite), ret); } } From d615a16622745d8e4c1104d5ca46c058ef576b7a Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:31:48 +0000 Subject: [PATCH 076/107] drm/i915: Make num_sprites a per-pipe value In the future, we need to be able to specify per-pipe number of planes/sprites. Let's start today! Signed-off-by: Damien Lespiau Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 8 ++++++-- drivers/gpu/drm/i915/i915_drv.h | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f8f7a59ab0761..e4d2b9f15ae29 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1480,12 +1480,16 @@ static void intel_device_info_runtime_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_device_info *info; + enum pipe pipe; info = (struct intel_device_info *)&dev_priv->info; - info->num_sprites = 1; if (IS_VALLEYVIEW(dev)) - info->num_sprites = 2; + for_each_pipe(pipe) + info->num_sprites[pipe] = 2; + else + for_each_pipe(pipe) + info->num_sprites[pipe] = 1; if (i915.disable_display) { DRM_INFO("Display disabled (module parameter)\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2f74a773f59eb..788acfb9aea55 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,7 +79,7 @@ enum plane { }; #define plane_name(p) ((p) + 'A') -#define sprite_name(p, s) ((p) * INTEL_INFO(dev)->num_sprites + (s) + 'A') +#define sprite_name(p, s) ((p) * INTEL_INFO(dev)->num_sprites[(p)] + (s) + 'A') enum port { PORT_A = 0, @@ -159,7 +159,7 @@ enum hpd_pin { I915_GEM_DOMAIN_VERTEX) #define for_each_pipe(p) for ((p) = 0; (p) < INTEL_INFO(dev)->num_pipes; (p)++) -#define for_each_sprite(p, s) for ((s) = 0; (s) < INTEL_INFO(dev)->num_sprites; (s)++) +#define for_each_sprite(p, s) for ((s) = 0; (s) < INTEL_INFO(dev)->num_sprites[(p)]; (s)++) #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ @@ -542,7 +542,7 @@ struct intel_uncore { struct intel_device_info { u32 display_mmio_offset; u8 num_pipes:3; - u8 num_sprites:2; + u8 num_sprites[I915_MAX_PIPES]; u8 gen; u8 ring_mask; /* Rings supported by the HW */ DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON); From b3064154dfd37deb386b1e459c54e1ca2460b3d5 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 4 Mar 2014 00:42:44 +0100 Subject: [PATCH 077/107] drm/i915: Don't just say it, actually force edp vdd This patch fixes the blank screen bug introduced in 3.14-rc1 on the MacBook Air 6,2. The comments state that we need to force edp vdd so lets put it back. The regression was introduced by the following commit: commit dff392dbd258381a6c3164f38420593f2d291e3b Author: Paulo Zanoni Date: Fri Dec 6 17:32:41 2013 -0200 drm/i915: don't touch the VDD when disabling the panel v2: Wrap intel_disable_dp() with _vdd_on and _vdd_off Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74628 Cc: Paulo Zanoni Cc: Chris Wilson Signed-off-by: Patrik Jakobsson Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1ac4b11765c72..10154ec77b984 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1326,7 +1326,8 @@ void intel_edp_panel_off(struct intel_dp *intel_dp) pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some * panels get very unhappy and cease to work. */ - pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_BLC_ENABLE); + pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | + EDP_BLC_ENABLE); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -1861,9 +1862,11 @@ static void intel_disable_dp(struct intel_encoder *encoder) /* Make sure the panel is off before trying to change the mode. But also * ensure that we have vdd while we switch off the panel. */ + edp_panel_vdd_on(intel_dp); intel_edp_backlight_off(intel_dp); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_edp_panel_off(intel_dp); + edp_panel_vdd_off(intel_dp, true); /* cpu edp my only be disable _after_ the cpu pipe/plane is disabled. */ if (!(port == PORT_A || IS_VALLEYVIEW(dev))) From cb216aa844e211923a270f5c68ecbdfe738e3a3e Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:42:36 +0000 Subject: [PATCH 078/107] drm/i915: Make i915_gem_retire_requests_ring() static Its last usage outside of i915_gem.c was removed in: commit 1f70999f9052f5a1b0ce1a55aff3808f2ec9fe42 Author: Chris Wilson Date: Mon Jan 27 22:43:07 2014 +0000 drm/i915: Prevent recursion by retiring requests when the ring is full Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 788acfb9aea55..354b79e9dcc45 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2178,7 +2178,6 @@ struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_ring_buffer *ring); bool i915_gem_retire_requests(struct drm_device *dev); -void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible); static inline bool i915_reset_in_progress(struct i915_gpu_error *error) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6e17b45db8507..18ea6bccbbf04 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -61,6 +61,7 @@ static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); +static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -2414,7 +2415,7 @@ void i915_gem_reset(struct drm_device *dev) /** * This function clears the request list as sequence numbers are passed. */ -void +static void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; From 9ad6ce51026204cbb2fda4c63f3544c3eb637471 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 17:42:37 +0000 Subject: [PATCH 079/107] drm/i915: Remove unused to_gem_object() macro That macro was only ever used to convert ring->private into a gem object (hence the forceful cast). ring->private doesn't even exist anymore as it was transmogrified by Chris in: commit 0d1aacac36530fce058d7a0db3da7befd5765417 Author: Chris Wilson Date: Mon Aug 26 20:58:11 2013 +0100 drm/i915: Embed the ring->private within the struct intel_ring_buffer Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 354b79e9dcc45..dce09fcddc2c7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1761,7 +1761,6 @@ struct drm_i915_gem_object { /** for phy allocated objects */ struct drm_i915_gem_phys_object *phys_obj; }; -#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base) #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) From 96a6f0f1db4eb97b0ac4342228d2cea8b7e3eeba Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Mon, 3 Mar 2014 23:57:24 +0000 Subject: [PATCH 080/107] drm/i915: Fix i915_switch_context() argument name in kerneldoc While reading some code, out of boredom, stumbled on a tiny tiny fix. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d194f5084edfa..ce41cff843466 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -758,7 +758,7 @@ static int do_switch(struct intel_ring_buffer *ring, * i915_switch_context() - perform a GPU context switch. * @ring: ring for which we'll execute the context switch * @file_priv: file_priv associated with the context, may be NULL - * @id: context id number + * @to: the context to switch to * * The context life cycle is simple. The context refcount is incremented and * decremented by 1 and create and destroy. If the context is in use by the GPU, From 5babf0fc26ae5596c2c113702568167fb7f8cf9b Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 5 Mar 2014 18:08:18 +0200 Subject: [PATCH 081/107] drm/i915: No need to put forcewake after a reset As we now have intel_uncore_forcewake_reset() no need to do explicit put after reset. v2: rebase Signed-off-by: Mika Kuoppala Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_uncore.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6ca24acd8b3ea..00320fd10322d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -952,6 +952,7 @@ static int gen6_do_reset(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret; unsigned long irqflags; + u32 fw_engine = 0; /* Hold uncore.lock across reset to prevent any register access * with forcewake not set correctly @@ -971,25 +972,21 @@ static int gen6_do_reset(struct drm_device *dev) intel_uncore_forcewake_reset(dev); - /* If reset with a user forcewake, try to restore, otherwise turn it off */ + /* If reset with a user forcewake, try to restore */ if (IS_VALLEYVIEW(dev)) { if (dev_priv->uncore.fw_rendercount) - dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_RENDER); - else - dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_RENDER); + fw_engine |= FORCEWAKE_RENDER; if (dev_priv->uncore.fw_mediacount) - dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_MEDIA); - else - dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_MEDIA); + fw_engine |= FORCEWAKE_MEDIA; } else { if (dev_priv->uncore.forcewake_count) - dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); - else - dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + fw_engine = FORCEWAKE_ALL; } - /* Restore fifo count */ + if (fw_engine) + dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_engine); + if (IS_GEN6(dev) || IS_GEN7(dev)) dev_priv->uncore.fifo_count = __raw_i915_read32(dev_priv, GTFIFOCTL) & From 38aecea0ccbb909d635619cba22f1891e589b434 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 3 Mar 2014 11:18:10 +0100 Subject: [PATCH 082/107] drm/i915: reverse dp link param selection, prefer fast over wide again ... it's this time of the year again. Originally we've frobbed this to fix up some regressions, but maybe our DP code improved sufficiently now that we can dare to do again what the spec recommends. This reverts commit 2514bc510d0c3aadcc5204056bb440fa36845147 Author: Jesse Barnes Date: Thu Jun 21 15:13:50 2012 -0700 drm/i915: prefer wide & slow to fast & narrow in DP configs I'm pretty sure I'll regret this patch, but otoh I expect we won't make progress here without poking the devil occasionally. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73694 Cc: peter@colberg.org Cc: Jesse Barnes Tested-by: Itai BEN YAACOV Tested-by: David En Reported-and-Tested-by: Marcus Bergner Reviewed-by: Jani Nikula Acked-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 10154ec77b984..62e8efedce52c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -909,8 +909,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, bpp); - for (clock = 0; clock <= max_clock; clock++) { - for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) { + for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) { + for (clock = 0; clock <= max_clock; clock++) { link_clock = drm_dp_bw_code_to_link_rate(bws[clock]); link_avail = intel_dp_max_data_rate(link_clock, lane_count); From c7c656226842679bcd9f39dc24441b4ff398a850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 5 Mar 2014 13:05:45 +0200 Subject: [PATCH 083/107] drm/i915: Don't clobber CHICKEN_PIPESL_1 on BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Misplaced parens cause us to totally clobber the CHICKEN_PIPESL_1 registers with 0xffffffff. Move the parens to the correct place to avoid this. In particular this caused bit 30 of said registers to be set, which caused the sprite CSC to produce incorrect results. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72220 Signed-off-by: Ville Syrjälä Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8df1826a6015f..2cc9de7899b12 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4839,8 +4839,8 @@ static void gen8_init_clock_gating(struct drm_device *dev) /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ for_each_pipe(pipe) { I915_WRITE(CHICKEN_PIPESL_1(pipe), - I915_READ(CHICKEN_PIPESL_1(pipe) | - DPRS_MASK_VBLANK_SRD)); + I915_READ(CHICKEN_PIPESL_1(pipe)) | + DPRS_MASK_VBLANK_SRD); } /* Use Force Non-Coherent whenever executing a 3D context. This is a From 2adb6db8d9fb0f94173a4cba6e1dcb8585f1a928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 5 Mar 2014 13:05:46 +0200 Subject: [PATCH 084/107] drm/i915: Use RMW to update chicken bits in gen7_enable_fbc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen7_enable_fbc() may write to some registers which we've already touched, so use RMW so that we don't undo any previous updates. Also note that we implemnt WaFbcAsynchFlipDisableFbcQueue:bdw. Signed-off-by: Ville Syrjälä Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2cc9de7899b12..e8f2d8a7066ee 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -294,10 +294,13 @@ static void gen7_enable_fbc(struct drm_crtc *crtc) if (IS_IVYBRIDGE(dev)) { /* WaFbcAsynchFlipDisableFbcQueue:ivb */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, ILK_FBCQ_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS); } else { - /* WaFbcAsynchFlipDisableFbcQueue:hsw */ + /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ I915_WRITE(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe), + I915_READ(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe)) | HSW_BYPASS_FBC_QUEUE); } From 8f670bb15a69e1186098454beb1b43dc1d923a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 5 Mar 2014 13:05:47 +0200 Subject: [PATCH 085/107] drm/i915: Unify CHICKEN_PIPESL_1 register definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have two names for the same register CHICKEN_PIPESL_1 and HSW_PIPE_SLICE_CHICKEN_1. Unify it to just one. Also rename the FBCQ disable bit to resemble the name we've given to a similar bit on earlier platforms. Signed-off-by: Ville Syrjälä Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 10 ++-------- drivers/gpu/drm/i915/intel_pm.c | 8 ++++---- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c913ca5c3b392..04c00f39e94aa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1129,13 +1129,6 @@ #define FBC_REND_NUKE (1<<2) #define FBC_REND_CACHE_CLEAN (1<<1) -#define _HSW_PIPE_SLICE_CHICKEN_1_A 0x420B0 -#define _HSW_PIPE_SLICE_CHICKEN_1_B 0x420B4 -#define HSW_BYPASS_FBC_QUEUE (1<<22) -#define HSW_PIPE_SLICE_CHICKEN_1(pipe) _PIPE(pipe, + \ - _HSW_PIPE_SLICE_CHICKEN_1_A, + \ - _HSW_PIPE_SLICE_CHICKEN_1_B) - /* * GPIO regs */ @@ -4148,7 +4141,8 @@ #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 -#define DPRS_MASK_VBLANK_SRD (1 << 0) +#define HSW_FBCQ_DIS (1 << 22) +#define BDW_DPRS_MASK_VBLANK_SRD (1 << 0) #define CHICKEN_PIPESL_1(pipe) _PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) #define DISP_ARB_CTL 0x45000 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e8f2d8a7066ee..f348ad287d4d8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -299,9 +299,9 @@ static void gen7_enable_fbc(struct drm_crtc *crtc) ILK_FBCQ_DIS); } else { /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ - I915_WRITE(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe), - I915_READ(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe)) | - HSW_BYPASS_FBC_QUEUE); + I915_WRITE(CHICKEN_PIPESL_1(intel_crtc->pipe), + I915_READ(CHICKEN_PIPESL_1(intel_crtc->pipe)) | + HSW_FBCQ_DIS); } I915_WRITE(SNB_DPFC_CTL_SA, @@ -4843,7 +4843,7 @@ static void gen8_init_clock_gating(struct drm_device *dev) for_each_pipe(pipe) { I915_WRITE(CHICKEN_PIPESL_1(pipe), I915_READ(CHICKEN_PIPESL_1(pipe)) | - DPRS_MASK_VBLANK_SRD); + BDW_DPRS_MASK_VBLANK_SRD); } /* Use Force Non-Coherent whenever executing a 3D context. This is a From 70bf407c8deb5d2e26468a99f1af19a166bb89e7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:22:51 +0200 Subject: [PATCH 086/107] drm/i915: fold in __intel_power_well_get/put functions These functions are used only by a single call site and are simple enough to just fold them in. Note that in later patches the parts folded in here are further simplified as we'll remove hsw_{disable,enable}_package_c8 and the NULL check of the power well enable/disable handlers. All this means that at the end intel_display_power_get/put() becomes more understandable as we don't need to jump between two functions when reading the code. No functional change. v2: - clarify the rational for the change (Chris) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 37 ++++++++++++--------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f348ad287d4d8..6f231890b3954 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5366,27 +5366,6 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } } -static void __intel_power_well_get(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - if (!power_well->count++ && power_well->set) { - hsw_disable_package_c8(dev_priv); - power_well->set(dev_priv, power_well, true); - } -} - -static void __intel_power_well_put(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON(!power_well->count); - - if (!--power_well->count && power_well->set && - i915.disable_power_well) { - power_well->set(dev_priv, power_well, false); - hsw_enable_package_c8(dev_priv); - } -} - void intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { @@ -5399,7 +5378,10 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, mutex_lock(&power_domains->lock); for_each_power_well(i, power_well, BIT(domain), power_domains) - __intel_power_well_get(dev_priv, power_well); + if (!power_well->count++ && power_well->set) { + hsw_disable_package_c8(dev_priv); + power_well->set(dev_priv, power_well, true); + } power_domains->domain_use_count[domain]++; @@ -5420,8 +5402,15 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, WARN_ON(!power_domains->domain_use_count[domain]); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) - __intel_power_well_put(dev_priv, power_well); + for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { + WARN_ON(!power_well->count); + + if (!--power_well->count && power_well->set && + i915.disable_power_well) { + power_well->set(dev_priv, power_well, false); + hsw_enable_package_c8(dev_priv); + } + } mutex_unlock(&power_domains->lock); } From 77d22dcacddb929bc700370d0fc079447c47fd89 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 5 Mar 2014 16:20:52 +0200 Subject: [PATCH 087/107] drm/i915: move modeset_update_power_wells earlier These functions will be needed by the valleyview specific power well update functionality added in an upcoming patch, so move them earlier. No functional change. v2: - no change v3: - rebase on latest -nightly Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 140 +++++++++++++-------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 91c2e3b063964..ee786c5850267 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3958,6 +3958,76 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) I915_WRITE(BCLRPAT(crtc->pipe), 0); } +#define for_each_power_domain(domain, mask) \ + for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ + if ((1 << (domain)) & (mask)) + +static unsigned long get_pipe_power_domains(struct drm_device *dev, + enum pipe pipe, bool pfit_enabled) +{ + unsigned long mask; + enum transcoder transcoder; + + transcoder = intel_pipe_to_cpu_transcoder(dev->dev_private, pipe); + + mask = BIT(POWER_DOMAIN_PIPE(pipe)); + mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); + if (pfit_enabled) + mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + + return mask; +} + +void intel_display_set_init_power(struct drm_i915_private *dev_priv, + bool enable) +{ + if (dev_priv->power_domains.init_power_on == enable) + return; + + if (enable) + intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + else + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); + + dev_priv->power_domains.init_power_on = enable; +} + +static void modeset_update_crtc_power_domains(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long pipe_domains[I915_MAX_PIPES] = { 0, }; + struct intel_crtc *crtc; + + /* + * First get all needed power domains, then put all unneeded, to avoid + * any unnecessary toggling of the power wells. + */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { + enum intel_display_power_domain domain; + + if (!crtc->base.enabled) + continue; + + pipe_domains[crtc->pipe] = get_pipe_power_domains(dev, + crtc->pipe, + crtc->config.pch_pfit.enabled); + + for_each_power_domain(domain, pipe_domains[crtc->pipe]) + intel_display_power_get(dev_priv, domain); + } + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { + enum intel_display_power_domain domain; + + for_each_power_domain(domain, crtc->enabled_power_domains) + intel_display_power_put(dev_priv, domain); + + crtc->enabled_power_domains = pipe_domains[crtc->pipe]; + } + + intel_display_set_init_power(dev_priv, false); +} + int valleyview_get_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -6817,76 +6887,6 @@ static void hsw_update_package_c8(struct drm_device *dev) mutex_unlock(&dev_priv->pc8.lock); } -#define for_each_power_domain(domain, mask) \ - for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - if ((1 << (domain)) & (mask)) - -static unsigned long get_pipe_power_domains(struct drm_device *dev, - enum pipe pipe, bool pfit_enabled) -{ - unsigned long mask; - enum transcoder transcoder; - - transcoder = intel_pipe_to_cpu_transcoder(dev->dev_private, pipe); - - mask = BIT(POWER_DOMAIN_PIPE(pipe)); - mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); - if (pfit_enabled) - mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); - - return mask; -} - -void intel_display_set_init_power(struct drm_i915_private *dev_priv, - bool enable) -{ - if (dev_priv->power_domains.init_power_on == enable) - return; - - if (enable) - intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); - else - intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - - dev_priv->power_domains.init_power_on = enable; -} - -static void modeset_update_crtc_power_domains(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long pipe_domains[I915_MAX_PIPES] = { 0, }; - struct intel_crtc *crtc; - - /* - * First get all needed power domains, then put all unneeded, to avoid - * any unnecessary toggling of the power wells. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { - enum intel_display_power_domain domain; - - if (!crtc->base.enabled) - continue; - - pipe_domains[crtc->pipe] = get_pipe_power_domains(dev, - crtc->pipe, - crtc->config.pch_pfit.enabled); - - for_each_power_domain(domain, pipe_domains[crtc->pipe]) - intel_display_power_get(dev_priv, domain); - } - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { - enum intel_display_power_domain domain; - - for_each_power_domain(domain, crtc->enabled_power_domains) - intel_display_power_put(dev_priv, domain); - - crtc->enabled_power_domains = pipe_domains[crtc->pipe]; - } - - intel_display_set_init_power(dev_priv, false); -} - static void haswell_modeset_global_resources(struct drm_device *dev) { modeset_update_crtc_power_domains(dev); From 93a25a9e2d67765c3092bfaac9b855d95e39df97 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 6 Mar 2014 09:40:43 +0100 Subject: [PATCH 088/107] drm/i915: Disable full ppgtt by default There are too many oustanding issues: - Fence handling in the current code is broken. There's a patch series from me, but it's blocked on and extended review (which includes writing the testcases). - IOMMU mapping handling is broken, we need to properly refcount it - currently it gets destroyed when the first vma is unbound, so way too early. - There's a pending reset issue on snb. Since Mika's reset work and full ppgtt have been pulled in in separate branches and ended up intermittingly breaking each another it's unclear who's the exact culprit here. - We still have persistent evidince of crazy recursion bugs through vma_unbind and ppgtt_relase, e.g. https://bugs.freedesktop.org/show_bug.cgi?id=73383 This issue (and a few others meanwhile resolved) have blocked our performance measuring/tuning group since 3 months. - Secure batch dispatching is broken. This is blocking Brad Volkin's command checker work since 3 months. All these issues are confirmed to only happen when full ppgtt is enabled, falling back to aliasing ppgtt resolves them. But even aliasing ppgtt itself still has a regression: - We currently unconditionally bind objects into the aliasing ppgtt, which means all priviledged objects like ringbuffers are visible to unpriviledged access again. On top of that this also breaks the command checker for aliasing ppgtt, since it can't hide the validated batch any more. Furthermore topic/full-ppgtt has never been reviewed: - Lifetime rules around vma unbinding/release are unclear, resulting into this awesome hack called ppgtt_release. Which seems to take the blame for most of the recursion fallout. - Context/ring init works different on gpu reset than anywhere else. Such differeneces have in the past always lead to really hard to track down bugs. - Aliasing ppgtt is treated in a bunch of places as a real address space, but it isn't - the real address space is always the global gtt in that case. This results in a bit a mess between contexts and ppgtt object, further complication the context/ppgtt/vma lifetime rules. - We don't have any docs describing the overall concepts introduced with full ppgtt. A short, concise overview describing vmas and some of the strange bits around them (like the unbound vmas used by execbuf, or the new binding rules) really is needed. Note that a lot of the post topic/full-ppgtt merge fallout has already been addressed, this entire list here of 10 issues really only contains the still outstanding issues. Finally the 3.15 merge window is approaching and I think we need to use the remaining time to ensure that our fallback option of using aliasing ppgtt is in solid shape. Hence I think it's time to throw the switch. While at it demote the helper from static inline status because really. Cc: Ben Widawsky Cc: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 22 +--------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dce09fcddc2c7..843aaee8d80bd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2377,27 +2377,7 @@ static inline void i915_gem_chipset_flush(struct drm_device *dev) intel_gtt_chipset_flush(); } int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); -static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) -{ - if (i915.enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) - return false; - - if (i915.enable_ppgtt == 1 && full) - return false; - -#ifdef CONFIG_INTEL_IOMMU - /* Disable ppgtt on SNB if VT-d is on. */ - if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { - DRM_INFO("Disabling PPGTT because VT-d is on\n"); - return false; - } -#endif - - if (full) - return HAS_PPGTT(dev); - else - return HAS_ALIASING_PPGTT(dev); -} +bool intel_enable_ppgtt(struct drm_device *dev, bool full); /* i915_gem_stolen.c */ int i915_gem_init_stolen(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a616cac0f1610..3d8bd62a926cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -30,6 +30,29 @@ #include "i915_trace.h" #include "intel_drv.h" +bool intel_enable_ppgtt(struct drm_device *dev, bool full) +{ + if (i915.enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) + return false; + + if (i915.enable_ppgtt == 1 && full) + return false; + +#ifdef CONFIG_INTEL_IOMMU + /* Disable ppgtt on SNB if VT-d is on. */ + if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { + DRM_INFO("Disabling PPGTT because VT-d is on\n"); + return false; + } +#endif + + /* Full ppgtt disabled by default for now due to issues. */ + if (full) + return false; /* HAS_PPGTT(dev) */ + else + return HAS_ALIASING_PPGTT(dev); +} + #define GEN6_PPGTT_PD_ENTRIES 512 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) typedef uint64_t gen8_gtt_pte_t; From efcad91742a37aca0a8d33441c86f51bb0a90daa Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:22:53 +0200 Subject: [PATCH 089/107] drm/i915: move power domain macros to intel_pm.c These macros are used only locally, so move them to the .c file. No functional change. v2: - add init power domain to always-on power wells in the following - separate - patch (Paulo) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 ---------- drivers/gpu/drm/i915/intel_pm.c | 20 ++++++++++++++++++-- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 843aaee8d80bd..857871b763057 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -121,8 +121,6 @@ enum intel_display_power_domain { POWER_DOMAIN_NUM, }; -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) - #define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) #define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \ ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER) @@ -130,14 +128,6 @@ enum intel_display_power_domain { ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \ (tran) + POWER_DOMAIN_TRANSCODER_A) -#define HSW_ALWAYS_ON_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_EDP)) -#define BDW_ALWAYS_ON_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER)) - enum hpd_pin { HPD_NONE = 0, HPD_PORT_A = HPD_NONE, /* PORT_A is internal */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6f231890b3954..fa7b2275fb930 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5445,6 +5445,22 @@ void i915_release_power_well(void) } EXPORT_SYMBOL_GPL(i915_release_power_well); +#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) + +#define HSW_ALWAYS_ON_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_A) | \ + BIT(POWER_DOMAIN_TRANSCODER_EDP)) +#define HSW_DISPLAY_POWER_DOMAINS ( \ + (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) | \ + BIT(POWER_DOMAIN_INIT)) + +#define BDW_ALWAYS_ON_POWER_DOMAINS ( \ + HSW_ALWAYS_ON_POWER_DOMAINS | \ + BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER)) +#define BDW_DISPLAY_POWER_DOMAINS ( \ + (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) | \ + BIT(POWER_DOMAIN_INIT)) + static struct i915_power_well i9xx_always_on_power_well[] = { { .name = "always-on", @@ -5461,7 +5477,7 @@ static struct i915_power_well hsw_power_wells[] = { }, { .name = "display", - .domains = POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS, + .domains = HSW_DISPLAY_POWER_DOMAINS, .is_enabled = hsw_power_well_enabled, .set = hsw_set_power_well, }, @@ -5475,7 +5491,7 @@ static struct i915_power_well bdw_power_wells[] = { }, { .name = "display", - .domains = POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS, + .domains = BDW_DISPLAY_POWER_DOMAINS, .is_enabled = hsw_power_well_enabled, .set = hsw_set_power_well, }, From f5938f363535b1723f81bd5debcb7ce5161ece95 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:22:54 +0200 Subject: [PATCH 090/107] drm/i915: add init power domain to always-on power wells Whenever we request a power domain it has to guarantee that all HW resources are enabled that are needed to access a HW register associated with that power domain. In case a register is on an always-on power well this won't result in turning on a power well, but it may require enabling some other HW resource. One such resource is the HSW/BDW device D0 state that is required for all register accesses and thus for all power wells/power domains. So far the init power domain (guaranteeing access to all HW registers) was part of the default i9xx always-on power well, but not the HSW/BDW always-on power wells. Add the domain to the latter power wells too. Atm, all the always-on power wells have noop handlers, so this doesn't change the functionality. v2: - clarify semantics of always-on power wells (Paulo) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fa7b2275fb930..67a87f907999d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5449,7 +5449,8 @@ EXPORT_SYMBOL_GPL(i915_release_power_well); #define HSW_ALWAYS_ON_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_EDP)) + BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ + BIT(POWER_DOMAIN_INIT)) #define HSW_DISPLAY_POWER_DOMAINS ( \ (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) | \ BIT(POWER_DOMAIN_INIT)) From c6cb582e6cf7b2e7ecb9668f53bd4fe6295bee82 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:22:55 +0200 Subject: [PATCH 091/107] drm/i915: split power well 'set' handler to separate enable/disable/sync_hw Split the 'set' power well handler into an 'enable', 'disable' and 'sync_hw' handler. This maps more conveniently to higher level operations, for example it allows us to push the hsw package c8 handling into the corresponding hsw/bdw enable/disable handlers and the hsw BIOS hand-over setting into the hsw/bdw sync_hw handler. No functional change. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes [danvet: Appease checkpatch's whitespace complaints.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 35 ++++++++++++++-- drivers/gpu/drm/i915/intel_pm.c | 73 ++++++++++++++++++++++----------- 2 files changed, 80 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 857871b763057..9e26103af07aa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1014,6 +1014,36 @@ struct intel_ilk_power_mgmt { struct drm_i915_gem_object *renderctx; }; +struct drm_i915_private; +struct i915_power_well; + +struct i915_power_well_ops { + /* + * Synchronize the well's hw state to match the current sw state, for + * example enable/disable it based on the current refcount. Called + * during driver init and resume time, possibly after first calling + * the enable/disable handlers. + */ + void (*sync_hw)(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well); + /* + * Enable the well and resources that depend on it (for example + * interrupts located on the well). Called after the 0->1 refcount + * transition. + */ + void (*enable)(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well); + /* + * Disable the well and resources that depend on it. Called after + * the 1->0 refcount transition. + */ + void (*disable)(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well); + /* Returns the hw enabled state. */ + bool (*is_enabled)(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well); +}; + /* Power well structure for haswell */ struct i915_power_well { const char *name; @@ -1022,10 +1052,7 @@ struct i915_power_well { int count; unsigned long domains; void *data; - void (*set)(struct drm_i915_private *dev_priv, struct i915_power_well *power_well, - bool enable); - bool (*is_enabled)(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well); + const struct i915_power_well_ops *ops; }; struct i915_power_domains { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 67a87f907999d..c27efc25aa4ac 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5258,7 +5258,7 @@ bool intel_display_power_enabled(struct drm_i915_private *dev_priv, if (power_well->always_on) continue; - if (!power_well->is_enabled(dev_priv, power_well)) { + if (!power_well->ops->is_enabled(dev_priv, power_well)) { is_enabled = false; break; } @@ -5366,6 +5366,33 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } } +static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + hsw_set_power_well(dev_priv, power_well, power_well->count > 0); + + /* + * We're taking over the BIOS, so clear any requests made by it since + * the driver is in charge now. + */ + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) + I915_WRITE(HSW_PWR_WELL_BIOS, 0); +} + +static void hsw_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + hsw_disable_package_c8(dev_priv); + hsw_set_power_well(dev_priv, power_well, true); +} + +static void hsw_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + hsw_set_power_well(dev_priv, power_well, false); + hsw_enable_package_c8(dev_priv); +} + void intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { @@ -5378,10 +5405,8 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, mutex_lock(&power_domains->lock); for_each_power_well(i, power_well, BIT(domain), power_domains) - if (!power_well->count++ && power_well->set) { - hsw_disable_package_c8(dev_priv); - power_well->set(dev_priv, power_well, true); - } + if (!power_well->count++ && power_well->ops->enable) + power_well->ops->enable(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -5405,11 +5430,9 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { WARN_ON(!power_well->count); - if (!--power_well->count && power_well->set && - i915.disable_power_well) { - power_well->set(dev_priv, power_well, false); - hsw_enable_package_c8(dev_priv); - } + if (!--power_well->count && power_well->ops->disable && + i915.disable_power_well) + power_well->ops->disable(dev_priv, power_well); } mutex_unlock(&power_domains->lock); @@ -5462,25 +5485,35 @@ EXPORT_SYMBOL_GPL(i915_release_power_well); (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) | \ BIT(POWER_DOMAIN_INIT)) +static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { }; + static struct i915_power_well i9xx_always_on_power_well[] = { { .name = "always-on", .always_on = 1, .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, }, }; +static const struct i915_power_well_ops hsw_power_well_ops = { + .sync_hw = hsw_power_well_sync_hw, + .enable = hsw_power_well_enable, + .disable = hsw_power_well_disable, + .is_enabled = hsw_power_well_enabled, +}; + static struct i915_power_well hsw_power_wells[] = { { .name = "always-on", .always_on = 1, .domains = HSW_ALWAYS_ON_POWER_DOMAINS, + .ops = &i9xx_always_on_power_well_ops, }, { .name = "display", .domains = HSW_DISPLAY_POWER_DOMAINS, - .is_enabled = hsw_power_well_enabled, - .set = hsw_set_power_well, + .ops = &hsw_power_well_ops, }, }; @@ -5489,12 +5522,12 @@ static struct i915_power_well bdw_power_wells[] = { .name = "always-on", .always_on = 1, .domains = BDW_ALWAYS_ON_POWER_DOMAINS, + .ops = &i9xx_always_on_power_well_ops, }, { .name = "display", .domains = BDW_DISPLAY_POWER_DOMAINS, - .is_enabled = hsw_power_well_enabled, - .set = hsw_set_power_well, + .ops = &hsw_power_well_ops, }, }; @@ -5539,8 +5572,8 @@ static void intel_power_domains_resume(struct drm_i915_private *dev_priv) mutex_lock(&power_domains->lock); for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - if (power_well->set) - power_well->set(dev_priv, power_well, power_well->count > 0); + if (power_well->ops->sync_hw) + power_well->ops->sync_hw(dev_priv, power_well); } mutex_unlock(&power_domains->lock); } @@ -5550,14 +5583,6 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv) /* For now, we need the power well to be always enabled. */ intel_display_set_init_power(dev_priv, true); intel_power_domains_resume(dev_priv); - - if (!(IS_HASWELL(dev_priv->dev) || IS_BROADWELL(dev_priv->dev))) - return; - - /* We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) - I915_WRITE(HSW_PWR_WELL_BIOS, 0); } /* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */ From a45f4466e4e160e6ce5332895710d3d881a6a51c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:22:56 +0200 Subject: [PATCH 092/107] drm/i915: add noop power well handlers instead of NULL checking them Reading code free of special cases wins over the small overhead of calling a noop handler. Suggested by Jesse. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c27efc25aa4ac..37f162132ae68 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5393,6 +5393,17 @@ static void hsw_power_well_disable(struct drm_i915_private *dev_priv, hsw_enable_package_c8(dev_priv); } +static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ +} + +static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + return true; +} + void intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { @@ -5405,7 +5416,7 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, mutex_lock(&power_domains->lock); for_each_power_well(i, power_well, BIT(domain), power_domains) - if (!power_well->count++ && power_well->ops->enable) + if (!power_well->count++) power_well->ops->enable(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -5430,8 +5441,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { WARN_ON(!power_well->count); - if (!--power_well->count && power_well->ops->disable && - i915.disable_power_well) + if (!--power_well->count && i915.disable_power_well) power_well->ops->disable(dev_priv, power_well); } @@ -5485,7 +5495,12 @@ EXPORT_SYMBOL_GPL(i915_release_power_well); (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) | \ BIT(POWER_DOMAIN_INIT)) -static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { }; +static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { + .sync_hw = i9xx_always_on_power_well_noop, + .enable = i9xx_always_on_power_well_noop, + .disable = i9xx_always_on_power_well_noop, + .is_enabled = i9xx_always_on_power_well_enabled, +}; static struct i915_power_well i9xx_always_on_power_well[] = { { @@ -5571,10 +5586,8 @@ static void intel_power_domains_resume(struct drm_i915_private *dev_priv) int i; mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - if (power_well->ops->sync_hw) - power_well->ops->sync_hw(dev_priv, power_well); - } + for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) + power_well->ops->sync_hw(dev_priv, power_well); mutex_unlock(&power_domains->lock); } From 319be8ae8aec7550371ac58f0fd29e9e51207b5b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:22:57 +0200 Subject: [PATCH 093/107] drm/i915: add port power domains Parts that poke port specific HW blocks like the encoder HW state readout or connector hotplug detect code need a way to check whether required power domains are on or enable/disable these. For this purpose add a set of power domains that refer to the port HW blocks. Get the proper port power domains during modeset. For now when requesting the power domain for a DDI port get it for a 4 lane configuration. This can be optimized later to request only the 2 lane power domain, when proper support is added on the VLV PHY side for this. Atm, the PHY setup code assumes a 4 lane config in all cases. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 22 ++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 11 ++++++ drivers/gpu/drm/i915/intel_display.c | 51 +++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_drv.h | 2 ++ drivers/gpu/drm/i915/intel_pm.c | 9 +++++ 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fbcf536924e0f..a90d31c786437 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2048,6 +2048,28 @@ static const char *power_domain_str(enum intel_display_power_domain domain) return "TRANSCODER_C"; case POWER_DOMAIN_TRANSCODER_EDP: return "TRANSCODER_EDP"; + case POWER_DOMAIN_PORT_DDI_A_2_LANES: + return "PORT_DDI_A_2_LANES"; + case POWER_DOMAIN_PORT_DDI_A_4_LANES: + return "PORT_DDI_A_4_LANES"; + case POWER_DOMAIN_PORT_DDI_B_2_LANES: + return "PORT_DDI_B_2_LANES"; + case POWER_DOMAIN_PORT_DDI_B_4_LANES: + return "PORT_DDI_B_4_LANES"; + case POWER_DOMAIN_PORT_DDI_C_2_LANES: + return "PORT_DDI_C_2_LANES"; + case POWER_DOMAIN_PORT_DDI_C_4_LANES: + return "PORT_DDI_C_4_LANES"; + case POWER_DOMAIN_PORT_DDI_D_2_LANES: + return "PORT_DDI_D_2_LANES"; + case POWER_DOMAIN_PORT_DDI_D_4_LANES: + return "PORT_DDI_D_4_LANES"; + case POWER_DOMAIN_PORT_DSI: + return "PORT_DSI"; + case POWER_DOMAIN_PORT_CRT: + return "PORT_CRT"; + case POWER_DOMAIN_PORT_OTHER: + return "PORT_OTHER"; case POWER_DOMAIN_VGA: return "VGA"; case POWER_DOMAIN_AUDIO: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9e26103af07aa..9387c56647ba7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -114,6 +114,17 @@ enum intel_display_power_domain { POWER_DOMAIN_TRANSCODER_B, POWER_DOMAIN_TRANSCODER_C, POWER_DOMAIN_TRANSCODER_EDP, + POWER_DOMAIN_PORT_DDI_A_2_LANES, + POWER_DOMAIN_PORT_DDI_A_4_LANES, + POWER_DOMAIN_PORT_DDI_B_2_LANES, + POWER_DOMAIN_PORT_DDI_B_4_LANES, + POWER_DOMAIN_PORT_DDI_C_2_LANES, + POWER_DOMAIN_PORT_DDI_C_4_LANES, + POWER_DOMAIN_PORT_DDI_D_2_LANES, + POWER_DOMAIN_PORT_DDI_D_4_LANES, + POWER_DOMAIN_PORT_DSI, + POWER_DOMAIN_PORT_CRT, + POWER_DOMAIN_PORT_OTHER, POWER_DOMAIN_VGA, POWER_DOMAIN_AUDIO, POWER_DOMAIN_INIT, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ee786c5850267..414da19499f9e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3962,9 +3962,49 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ if ((1 << (domain)) & (mask)) -static unsigned long get_pipe_power_domains(struct drm_device *dev, - enum pipe pipe, bool pfit_enabled) +enum intel_display_power_domain +intel_display_port_power_domain(struct intel_encoder *intel_encoder) +{ + struct drm_device *dev = intel_encoder->base.dev; + struct intel_digital_port *intel_dig_port; + + switch (intel_encoder->type) { + case INTEL_OUTPUT_UNKNOWN: + /* Only DDI platforms should ever use this output type */ + WARN_ON_ONCE(!HAS_DDI(dev)); + case INTEL_OUTPUT_DISPLAYPORT: + case INTEL_OUTPUT_HDMI: + case INTEL_OUTPUT_EDP: + intel_dig_port = enc_to_dig_port(&intel_encoder->base); + switch (intel_dig_port->port) { + case PORT_A: + return POWER_DOMAIN_PORT_DDI_A_4_LANES; + case PORT_B: + return POWER_DOMAIN_PORT_DDI_B_4_LANES; + case PORT_C: + return POWER_DOMAIN_PORT_DDI_C_4_LANES; + case PORT_D: + return POWER_DOMAIN_PORT_DDI_D_4_LANES; + default: + WARN_ON_ONCE(1); + return POWER_DOMAIN_PORT_OTHER; + } + case INTEL_OUTPUT_ANALOG: + return POWER_DOMAIN_PORT_CRT; + case INTEL_OUTPUT_DSI: + return POWER_DOMAIN_PORT_DSI; + default: + return POWER_DOMAIN_PORT_OTHER; + } +} + +static unsigned long get_crtc_power_domains(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; + struct intel_encoder *intel_encoder; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; + bool pfit_enabled = intel_crtc->config.pch_pfit.enabled; unsigned long mask; enum transcoder transcoder; @@ -3975,6 +4015,9 @@ static unsigned long get_pipe_power_domains(struct drm_device *dev, if (pfit_enabled) mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + for_each_encoder_on_crtc(dev, crtc, intel_encoder) + mask |= BIT(intel_display_port_power_domain(intel_encoder)); + return mask; } @@ -4008,9 +4051,7 @@ static void modeset_update_crtc_power_domains(struct drm_device *dev) if (!crtc->base.enabled) continue; - pipe_domains[crtc->pipe] = get_pipe_power_domains(dev, - crtc->pipe, - crtc->config.pch_pfit.enabled); + pipe_domains[crtc->pipe] = get_crtc_power_domains(&crtc->base); for_each_power_domain(domain, pipe_domains[crtc->pipe]) intel_display_power_get(dev_priv, domain); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 60427970caedb..e31eb1ec5b79e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -733,6 +733,8 @@ bool intel_crtc_active(struct drm_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); void intel_display_set_init_power(struct drm_i915_private *dev, bool enable); +enum intel_display_power_domain +intel_display_port_power_domain(struct intel_encoder *intel_encoder); int valleyview_get_vco(struct drm_i915_private *dev_priv); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_config *pipe_config); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 37f162132ae68..a4c0ff181d07c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5483,6 +5483,15 @@ EXPORT_SYMBOL_GPL(i915_release_power_well); #define HSW_ALWAYS_ON_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PIPE_A) | \ BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ + BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_CRT) | \ BIT(POWER_DOMAIN_INIT)) #define HSW_DISPLAY_POWER_DOMAINS ( \ (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) | \ From 671dedd212cdb5e64ce95b5445f1587556331ea5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 5 Mar 2014 16:20:53 +0200 Subject: [PATCH 094/107] drm/i915: get port power domain in connector detect handlers The connector detect and get_mode handlers need to access the port specific HW blocks to read the EDID etc. Get/put the port power domains around these handlers. v2: - get port power domain for HDMI too (Ville) - get port power domain for the DP,HDMI audio detect handlers (Jesse) - Leave the intel_runtime_pm_get/put in the DP detect function in place. Instead of just removing them, these should be moved to the appropriate power_well enable/disable handlers. We can do this after Paulo's 'Merge PC8 with runtime PM, v2' patchset. v3: - rebased on latest -nightly Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 22 ++++++++++++++++++++-- drivers/gpu/drm/i915/intel_dp.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_dsi.c | 13 ++++++++++++- drivers/gpu/drm/i915/intel_hdmi.c | 29 ++++++++++++++++++++++++++--- 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 469071db35280..96e28b8ca45ca 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -636,6 +636,8 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crt *crt = intel_attached_crt(connector); + struct intel_encoder *intel_encoder = &crt->base; + enum intel_display_power_domain power_domain; enum drm_connector_status status; struct intel_load_detect_pipe tmp; @@ -645,6 +647,9 @@ intel_crt_detect(struct drm_connector *connector, bool force) connector->base.id, drm_get_connector_name(connector), force); + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + if (I915_HAS_HOTPLUG(dev)) { /* We can not rely on the HPD pin always being correctly wired * up, for example many KVM do not pass it through, and so @@ -688,7 +693,9 @@ intel_crt_detect(struct drm_connector *connector, bool force) status = connector_status_unknown; out: + intel_display_power_put(dev_priv, power_domain); intel_runtime_pm_put(dev_priv); + return status; } @@ -702,17 +709,28 @@ static int intel_crt_get_modes(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crt *crt = intel_attached_crt(connector); + struct intel_encoder *intel_encoder = &crt->base; + enum intel_display_power_domain power_domain; int ret; struct i2c_adapter *i2c; + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); if (ret || !IS_G4X(dev)) - return ret; + goto out; /* Try to probe digital port for output in DVI-I -> VGA mode. */ i2c = intel_gmbus_get_adapter(dev_priv, GMBUS_PORT_DPB); - return intel_crt_ddc_get_modes(connector, i2c); + ret = intel_crt_ddc_get_modes(connector, i2c); + +out: + intel_display_power_put(dev_priv, power_domain); + + return ret; } static int intel_crt_set_property(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 62e8efedce52c..56edb0975db61 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3218,10 +3218,14 @@ intel_dp_detect(struct drm_connector *connector, bool force) struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; enum drm_connector_status status; + enum intel_display_power_domain power_domain; struct edid *edid = NULL; intel_runtime_pm_get(dev_priv); + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, drm_get_connector_name(connector)); @@ -3252,21 +3256,32 @@ intel_dp_detect(struct drm_connector *connector, bool force) status = connector_status_connected; out: + intel_display_power_put(dev_priv, power_domain); + intel_runtime_pm_put(dev_priv); + return status; } static int intel_dp_get_modes(struct drm_connector *connector) { struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *intel_encoder = &intel_dig_port->base; struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + enum intel_display_power_domain power_domain; int ret; /* We should parse the EDID data and find out if it has an audio sink */ + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + ret = intel_dp_get_edid_modes(connector, &intel_dp->adapter); + intel_display_power_put(dev_priv, power_domain); if (ret) return ret; @@ -3287,15 +3302,25 @@ static bool intel_dp_detect_audio(struct drm_connector *connector) { struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *intel_encoder = &intel_dig_port->base; + struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + enum intel_display_power_domain power_domain; struct edid *edid; bool has_audio = false; + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + edid = intel_dp_get_edid(connector, &intel_dp->adapter); if (edid) { has_audio = drm_detect_monitor_audio(edid); kfree(edid); } + intel_display_power_put(dev_priv, power_domain); + return has_audio; } diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 3ee1db1407b05..63b95bbd1c07f 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -488,8 +488,19 @@ static enum drm_connector_status intel_dsi_detect(struct drm_connector *connector, bool force) { struct intel_dsi *intel_dsi = intel_attached_dsi(connector); + struct intel_encoder *intel_encoder = &intel_dsi->base; + enum intel_display_power_domain power_domain; + enum drm_connector_status connector_status; + struct drm_i915_private *dev_priv = intel_encoder->base.dev->dev_private; + DRM_DEBUG_KMS("\n"); - return intel_dsi->dev.dev_ops->detect(&intel_dsi->dev); + power_domain = intel_display_port_power_domain(intel_encoder); + + intel_display_power_get(dev_priv, power_domain); + connector_status = intel_dsi->dev.dev_ops->detect(&intel_dsi->dev); + intel_display_power_put(dev_priv, power_domain); + + return connector_status; } static int intel_dsi_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 98d68ab04de46..ebccbbff0f15b 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -909,11 +909,15 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = dev->dev_private; struct edid *edid; + enum intel_display_power_domain power_domain; enum drm_connector_status status = connector_status_disconnected; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, drm_get_connector_name(connector)); + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + intel_hdmi->has_hdmi_sink = false; intel_hdmi->has_audio = false; intel_hdmi->rgb_quant_range_selectable = false; @@ -941,31 +945,48 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) intel_encoder->type = INTEL_OUTPUT_HDMI; } + intel_display_power_put(dev_priv, power_domain); + return status; } static int intel_hdmi_get_modes(struct drm_connector *connector) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_encoder *intel_encoder = intel_attached_encoder(connector); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base); struct drm_i915_private *dev_priv = connector->dev->dev_private; + enum intel_display_power_domain power_domain; + int ret; /* We should parse the EDID data and find out if it's an HDMI sink so * we can send audio to it. */ - return intel_ddc_get_modes(connector, + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + + ret = intel_ddc_get_modes(connector, intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus)); + + intel_display_power_put(dev_priv, power_domain); + + return ret; } static bool intel_hdmi_detect_audio(struct drm_connector *connector) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); + struct intel_encoder *intel_encoder = intel_attached_encoder(connector); + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base); struct drm_i915_private *dev_priv = connector->dev->dev_private; + enum intel_display_power_domain power_domain; struct edid *edid; bool has_audio = false; + power_domain = intel_display_port_power_domain(intel_encoder); + intel_display_power_get(dev_priv, power_domain); + edid = drm_get_edid(connector, intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus)); @@ -975,6 +996,8 @@ intel_hdmi_detect_audio(struct drm_connector *connector) kfree(edid); } + intel_display_power_put(dev_priv, power_domain); + return has_audio; } From 6d129beac7099bddad368b6a02e8e0a67f59e9b8 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 5 Mar 2014 16:20:54 +0200 Subject: [PATCH 095/107] drm/i915: check port power domain when reading the encoder hw state Since the encoder is tied to its port, we need to make sure the power domain for that port is on before reading out the encoder HW state. Note that this also covers also all connector get_hw_state handlers, since all those just call the corresponding encoder get_hw_state handler, which checks - after this change - for all power domains the connector needs. v2: - no change v3: - push down the power domain checks into the specific encoder get_hw_state handlers (Daniel) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 5 +++++ drivers/gpu/drm/i915/intel_ddi.c | 5 +++++ drivers/gpu/drm/i915/intel_dp.c | 9 ++++++++- drivers/gpu/drm/i915/intel_dsi.c | 5 +++++ drivers/gpu/drm/i915/intel_hdmi.c | 5 +++++ 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 96e28b8ca45ca..4ef6d69c078d3 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -68,8 +68,13 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crt *crt = intel_encoder_to_crt(encoder); + enum intel_display_power_domain power_domain; u32 tmp; + power_domain = intel_display_port_power_domain(encoder); + if (!intel_display_power_enabled(dev_priv, power_domain)) + return false; + tmp = I915_READ(crt->adpa_reg); if (!(tmp & ADPA_DAC_ENABLE)) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 2643d3b8b67da..e2665e09d5dff 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1145,9 +1145,14 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; enum port port = intel_ddi_get_encoder_port(encoder); + enum intel_display_power_domain power_domain; u32 tmp; int i; + power_domain = intel_display_port_power_domain(encoder); + if (!intel_display_power_enabled(dev_priv, power_domain)) + return false; + tmp = I915_READ(DDI_BUF_CTL(port)); if (!(tmp & DDI_BUF_CTL_ENABLE)) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 56edb0975db61..7584348b7e898 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1479,7 +1479,14 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 tmp = I915_READ(intel_dp->output_reg); + enum intel_display_power_domain power_domain; + u32 tmp; + + power_domain = intel_display_port_power_domain(encoder); + if (!intel_display_power_enabled(dev_priv, power_domain)) + return false; + + tmp = I915_READ(intel_dp->output_reg); if (!(tmp & DP_PORT_EN)) return false; diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 63b95bbd1c07f..cf7322e95278a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -243,11 +243,16 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; + enum intel_display_power_domain power_domain; u32 port, func; enum pipe p; DRM_DEBUG_KMS("\n"); + power_domain = intel_display_port_power_domain(encoder); + if (!intel_display_power_enabled(dev_priv, power_domain)) + return false; + /* XXX: this only works for one DSI output */ for (p = PIPE_A; p <= PIPE_B; p++) { port = I915_READ(MIPI_PORT_CTRL(p)); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ebccbbff0f15b..f410cc03e08a0 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -667,8 +667,13 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + enum intel_display_power_domain power_domain; u32 tmp; + power_domain = intel_display_port_power_domain(encoder); + if (!intel_display_power_enabled(dev_priv, power_domain)) + return false; + tmp = I915_READ(intel_hdmi->hdmi_reg); if (!(tmp & SDVO_ENABLE)) From b5482bd0ffefadf314098d7fae445aac9f3a0411 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 5 Mar 2014 16:20:55 +0200 Subject: [PATCH 096/107] drm/i915: check pipe power domain when reading its hw state We can read out the pipe HW state only if the required power domain is on. If not we consider the pipe to be off. v2: - no change v3: - push down the power domain checks into the specific crtc get_pipe_config handlers (Daniel) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes [danvet: Appease checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 414da19499f9e..66184423ae291 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5611,6 +5611,10 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = dev->dev_private; uint32_t tmp; + if (!intel_display_power_enabled(dev_priv, + POWER_DOMAIN_PIPE(crtc->pipe))) + return false; + pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = DPLL_ID_PRIVATE; @@ -6981,6 +6985,10 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, enum intel_display_power_domain pfit_domain; uint32_t tmp; + if (!intel_display_power_enabled(dev_priv, + POWER_DOMAIN_PIPE(crtc->pipe))) + return false; + pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = DPLL_ID_PRIVATE; From 7f9e192f1b504ff377f836a14176f38c5717361f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:23:01 +0200 Subject: [PATCH 097/107] drm/i915: vlv: keep first level vblank IRQs masked This is a left-over from commit b7e634cc8dcd320123199a18bae0937b40dc28b8 Author: Imre Deak Date: Tue Feb 4 21:35:45 2014 +0200 drm/i915: vlv: don't unmask IIR[DISPLAY_PIPE_A/B_VBLANK] interrupt where we stopped unmasking the vblank IRQs, but left them enabled in the IER register. Disable them in IER too. v2: - remove comment becoming stale after this change (Ville) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 939139bc70b1f..4a19306b2d73b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3035,17 +3035,9 @@ static int valleyview_irq_postinstall(struct drm_device *dev) enable_mask = I915_DISPLAY_PORT_INTERRUPT; enable_mask |= I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; - /* - *Leave vblank interrupts masked initially. enable/disable will - * toggle them based on usage. - */ - dev_priv->irq_mask = (~enable_mask) | - I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT | - I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT; + dev_priv->irq_mask = ~enable_mask; I915_WRITE(PORT_HOTPLUG_EN, 0); POSTING_READ(PORT_HOTPLUG_EN); From a30180a5a349709821725266caaa69ec9871efbe Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:23:02 +0200 Subject: [PATCH 098/107] drm/i915: sanitize PUNIT register macro definitions In the upcoming patches we'll need to access the rest of the fields in the punit power gating register, so prepare for that. v2: - add doc reference for the power well subsystem IDs (Jesse) - remove IDs for non-existant DPIO_RX[23] subsystems (Jesse) Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 28 ++++++++++++++++++++++------ drivers/gpu/drm/i915/intel_uncore.c | 4 +++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 04c00f39e94aa..b719385c45112 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -377,14 +377,30 @@ #define DSPFREQSTAT_MASK (0x3 << DSPFREQSTAT_SHIFT) #define DSPFREQGUAR_SHIFT 14 #define DSPFREQGUAR_MASK (0x3 << DSPFREQGUAR_SHIFT) + +/* See the PUNIT HAS v0.8 for the below bits */ +enum punit_power_well { + PUNIT_POWER_WELL_RENDER = 0, + PUNIT_POWER_WELL_MEDIA = 1, + PUNIT_POWER_WELL_DISP2D = 3, + PUNIT_POWER_WELL_DPIO_CMN_BC = 5, + PUNIT_POWER_WELL_DPIO_TX_B_LANES_01 = 6, + PUNIT_POWER_WELL_DPIO_TX_B_LANES_23 = 7, + PUNIT_POWER_WELL_DPIO_TX_C_LANES_01 = 8, + PUNIT_POWER_WELL_DPIO_TX_C_LANES_23 = 9, + PUNIT_POWER_WELL_DPIO_RX0 = 10, + PUNIT_POWER_WELL_DPIO_RX1 = 11, + + PUNIT_POWER_WELL_NUM, +}; + #define PUNIT_REG_PWRGT_CTRL 0x60 #define PUNIT_REG_PWRGT_STATUS 0x61 -#define PUNIT_CLK_GATE 1 -#define PUNIT_PWR_RESET 2 -#define PUNIT_PWR_GATE 3 -#define RENDER_PWRGT (PUNIT_PWR_GATE << 0) -#define MEDIA_PWRGT (PUNIT_PWR_GATE << 2) -#define DISP2D_PWRGT (PUNIT_PWR_GATE << 6) +#define PUNIT_PWRGT_MASK(power_well) (3 << ((power_well) * 2)) +#define PUNIT_PWRGT_PWR_ON(power_well) (0 << ((power_well) * 2)) +#define PUNIT_PWRGT_CLK_GATE(power_well) (1 << ((power_well) * 2)) +#define PUNIT_PWRGT_RESET(power_well) (2 << ((power_well) * 2)) +#define PUNIT_PWRGT_PWR_GATE(power_well) (3 << ((power_well) * 2)) #define PUNIT_REG_GPU_LFM 0xd3 #define PUNIT_REG_GPU_FREQ_REQ 0xd4 diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 00320fd10322d..7861d97600e18 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -361,7 +361,9 @@ void intel_uncore_sanitize(struct drm_device *dev) mutex_lock(&dev_priv->rps.hw_lock); reg_val = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS); - if (reg_val & (RENDER_PWRGT | MEDIA_PWRGT | DISP2D_PWRGT)) + if (reg_val & (PUNIT_PWRGT_PWR_GATE(PUNIT_POWER_WELL_RENDER) | + PUNIT_PWRGT_PWR_GATE(PUNIT_POWER_WELL_MEDIA) | + PUNIT_PWRGT_PWR_GATE(PUNIT_POWER_WELL_DISP2D))) vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, 0x0); mutex_unlock(&dev_priv->rps.hw_lock); From dd7c0b66e5414c54a9af8f100cc904240bab5102 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:23:03 +0200 Subject: [PATCH 099/107] drm/i915: factor out reset_vblank_counter We need to do the same for other platforms in upcoming patches. v2: - s/p/pipe (Ville) - Call the new helper with the vbl_lock already held. The part it protects is short, so releasing it between pipes only makes proving correctness more difficult. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes [danvet: Resolve conflict with Damien's s/p/pipe/ change.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a4c0ff181d07c..19591488fd2a4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5310,6 +5310,13 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) } } +static void reset_vblank_counter(struct drm_device *dev, enum pipe pipe) +{ + assert_spin_locked(&dev->vbl_lock); + + dev->vblank[pipe].last = 0; +} + static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -5326,7 +5333,7 @@ static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv) spin_lock_irqsave(&dev->vbl_lock, irqflags); for_each_pipe(pipe) if (pipe != PIPE_A) - dev->vblank[pipe].last = 0; + reset_vblank_counter(dev, pipe); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } From 25eaa003bd186e415d94bf0191152f1cd7252d9a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:23:06 +0200 Subject: [PATCH 100/107] drm/i915: sanity check power well sw state against hw state Suggested by Daniel. v2: - sanitize the state checking condition, the original was rather confusing (partly due to the unfortunate naming of i915.disable_power_well) (Ville) - simpler message+backtrace generation by using WARN instead of WARN_ON (Ville) - check if always-on power wells are truly on all the time Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 38 ++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 19591488fd2a4..5b039ca13927c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5411,6 +5411,29 @@ static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, return true; } +static void check_power_well_state(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + bool enabled = power_well->ops->is_enabled(dev_priv, power_well); + + if (power_well->always_on || !i915.disable_power_well) { + if (!enabled) + goto mismatch; + + return; + } + + if (enabled != (power_well->count > 0)) + goto mismatch; + + return; + +mismatch: + WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", + power_well->name, power_well->always_on, enabled, + power_well->count, i915.disable_power_well); +} + void intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { @@ -5422,9 +5445,14 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, BIT(domain), power_domains) - if (!power_well->count++) + for_each_power_well(i, power_well, BIT(domain), power_domains) { + if (!power_well->count++) { + DRM_DEBUG_KMS("enabling %s\n", power_well->name); power_well->ops->enable(dev_priv, power_well); + } + + check_power_well_state(dev_priv, power_well); + } power_domains->domain_use_count[domain]++; @@ -5448,8 +5476,12 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { WARN_ON(!power_well->count); - if (!--power_well->count && i915.disable_power_well) + if (!--power_well->count && i915.disable_power_well) { + DRM_DEBUG_KMS("disabling %s\n", power_well->name); power_well->ops->disable(dev_priv, power_well); + } + + check_power_well_state(dev_priv, power_well); } mutex_unlock(&power_domains->lock); From f8b79e58dc79f3f0edeb5194198a331374b11f82 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:23:07 +0200 Subject: [PATCH 101/107] drm/i915: vlv: factor out valleyview_display_irq_install We'll need to disable/re-enable the display-side IRQs when turning off/on the VLV display power well. Factor out the helper functions for this. For now keep the display IRQs enabled by default, so the functionality doesn't change. This will be changed to enable/disable the IRQs on-demand when adding support for VLV power wells in an upcoming patch. v2: - take the irq spin lock for the whole enable/disable sequence as these can be called with interrupts enabled Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_irq.c | 116 +++++++++++++++++++++++++++----- 3 files changed, 106 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e4d2b9f15ae29..65876c0e04996 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1672,6 +1672,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_mtrrfree; } + dev_priv->display_irqs_enabled = true; intel_irq_init(dev); intel_uncore_sanitize(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9387c56647ba7..8702893a64f69 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1469,6 +1469,8 @@ typedef struct drm_i915_private { /* protects the irq masks */ spinlock_t irq_lock; + bool display_irqs_enabled; + /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ struct pm_qos_request pm_qos; @@ -2063,6 +2065,9 @@ void i915_disable_pipestat(drm_i915_private_t *dev_priv, enum pipe pipe, u32 status_mask); +void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv); +void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv); + /* i915_gem.c */ int i915_gem_init_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4a19306b2d73b..d6f827ab8f01e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3025,36 +3025,113 @@ static int ironlake_irq_postinstall(struct drm_device *dev) return 0; } +static void valleyview_display_irqs_install(struct drm_i915_private *dev_priv) +{ + u32 pipestat_mask; + u32 iir_mask; + + pipestat_mask = PIPESTAT_INT_STATUS_MASK | + PIPE_FIFO_UNDERRUN_STATUS; + + I915_WRITE(PIPESTAT(PIPE_A), pipestat_mask); + I915_WRITE(PIPESTAT(PIPE_B), pipestat_mask); + POSTING_READ(PIPESTAT(PIPE_A)); + + pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | + PIPE_CRC_DONE_INTERRUPT_STATUS; + + i915_enable_pipestat(dev_priv, PIPE_A, pipestat_mask | + PIPE_GMBUS_INTERRUPT_STATUS); + i915_enable_pipestat(dev_priv, PIPE_B, pipestat_mask); + + iir_mask = I915_DISPLAY_PORT_INTERRUPT | + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; + dev_priv->irq_mask &= ~iir_mask; + + I915_WRITE(VLV_IIR, iir_mask); + I915_WRITE(VLV_IIR, iir_mask); + I915_WRITE(VLV_IMR, dev_priv->irq_mask); + I915_WRITE(VLV_IER, ~dev_priv->irq_mask); + POSTING_READ(VLV_IER); +} + +static void valleyview_display_irqs_uninstall(struct drm_i915_private *dev_priv) +{ + u32 pipestat_mask; + u32 iir_mask; + + iir_mask = I915_DISPLAY_PORT_INTERRUPT | + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT; + + dev_priv->irq_mask |= iir_mask; + I915_WRITE(VLV_IER, ~dev_priv->irq_mask); + I915_WRITE(VLV_IMR, dev_priv->irq_mask); + I915_WRITE(VLV_IIR, iir_mask); + I915_WRITE(VLV_IIR, iir_mask); + POSTING_READ(VLV_IIR); + + pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | + PIPE_CRC_DONE_INTERRUPT_STATUS; + + i915_disable_pipestat(dev_priv, PIPE_A, pipestat_mask | + PIPE_GMBUS_INTERRUPT_STATUS); + i915_disable_pipestat(dev_priv, PIPE_B, pipestat_mask); + + pipestat_mask = PIPESTAT_INT_STATUS_MASK | + PIPE_FIFO_UNDERRUN_STATUS; + I915_WRITE(PIPESTAT(PIPE_A), pipestat_mask); + I915_WRITE(PIPESTAT(PIPE_B), pipestat_mask); + POSTING_READ(PIPESTAT(PIPE_A)); +} + +void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) +{ + assert_spin_locked(&dev_priv->irq_lock); + + if (dev_priv->display_irqs_enabled) + return; + + dev_priv->display_irqs_enabled = true; + + if (dev_priv->dev->irq_enabled) + valleyview_display_irqs_install(dev_priv); +} + +void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) +{ + assert_spin_locked(&dev_priv->irq_lock); + + if (!dev_priv->display_irqs_enabled) + return; + + dev_priv->display_irqs_enabled = false; + + if (dev_priv->dev->irq_enabled) + valleyview_display_irqs_uninstall(dev_priv); +} + static int valleyview_irq_postinstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - u32 enable_mask; - u32 pipestat_enable = PLANE_FLIP_DONE_INT_STATUS_VLV | - PIPE_CRC_DONE_INTERRUPT_STATUS; unsigned long irqflags; - enable_mask = I915_DISPLAY_PORT_INTERRUPT; - enable_mask |= I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; - - dev_priv->irq_mask = ~enable_mask; + dev_priv->irq_mask = ~0; I915_WRITE(PORT_HOTPLUG_EN, 0); POSTING_READ(PORT_HOTPLUG_EN); I915_WRITE(VLV_IMR, dev_priv->irq_mask); - I915_WRITE(VLV_IER, enable_mask); + I915_WRITE(VLV_IER, ~dev_priv->irq_mask); I915_WRITE(VLV_IIR, 0xffffffff); - I915_WRITE(PIPESTAT(0), 0xffff); - I915_WRITE(PIPESTAT(1), 0xffff); POSTING_READ(VLV_IER); /* Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked check happy. */ spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - i915_enable_pipestat(dev_priv, PIPE_A, pipestat_enable); - i915_enable_pipestat(dev_priv, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS); - i915_enable_pipestat(dev_priv, PIPE_B, pipestat_enable); + if (dev_priv->display_irqs_enabled) + valleyview_display_irqs_install(dev_priv); spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); I915_WRITE(VLV_IIR, 0xffffffff); @@ -3185,6 +3262,7 @@ static void gen8_irq_uninstall(struct drm_device *dev) static void valleyview_irq_uninstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + unsigned long irqflags; int pipe; if (!dev_priv) @@ -3198,8 +3276,14 @@ static void valleyview_irq_uninstall(struct drm_device *dev) I915_WRITE(HWSTAM, 0xffffffff); I915_WRITE(PORT_HOTPLUG_EN, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); - for_each_pipe(pipe) - I915_WRITE(PIPESTAT(pipe), 0xffff); + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + if (dev_priv->display_irqs_enabled) + valleyview_display_irqs_uninstall(dev_priv); + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + + dev_priv->irq_mask = 0; + I915_WRITE(VLV_IIR, 0xffffffff); I915_WRITE(VLV_IMR, 0xffffffff); I915_WRITE(VLV_IER, 0x0); From f88d42f1d0272c46390434607b0f5de3889d157d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 4 Mar 2014 19:23:09 +0200 Subject: [PATCH 102/107] drm/i915: factor out intel_set_cpu_fifo_underrun_reporting_nolock Needed by the next patch, wanting to set the underrun reporting as part of a bigger dev_priv->irq_lock'ed sequence. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes [danvet: Use more customary __ prefix instead of _nolock postfix.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d6f827ab8f01e..f43dae9a9735f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -387,17 +387,14 @@ static void cpt_set_fifo_underrun_reporting(struct drm_device *dev, * * Returns the previous state of underrun reporting. */ -bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, - enum pipe pipe, bool enable) +bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - unsigned long flags; bool ret; - spin_lock_irqsave(&dev_priv->irq_lock, flags); - ret = !intel_crtc->cpu_fifo_underrun_disabled; if (enable == ret) @@ -415,7 +412,20 @@ bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, broadwell_set_fifo_underrun_reporting(dev, pipe, enable); done: + return ret; +} + +bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + bool ret; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + ret = __intel_set_cpu_fifo_underrun_reporting(dev, pipe, enable); spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + return ret; } From 77961eb984c7e5394bd29cc7be2ab0bf0cc7e7b1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 5 Mar 2014 16:20:56 +0200 Subject: [PATCH 103/107] drm/i915: power domains: add vlv power wells Based on an early draft from Jesse. Add support for powering on/off the dynamic power wells on VLV by registering its display and dpio dynamic power wells with the power domain framework. For now power on all PHY TX lanes regardless of the actual lane configuration. Later this can be optimized when the PHY side setup enables only the required lanes. Atm, it enables all lanes in all cases. v2: - undef function local COND macro after its last use (Ville) - Take dev_priv->irq_lock around the whole sequence of intel_set_cpu_fifo_underrun_reporting_nolock() and valleyview_disable_display_irqs(). They are short and releasing the lock in between only makes proving correctness more difficult. - sanitize local var names in vlv_power_well_enabled() v3: - rebase on latest -nightly Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes [danvet: Resolve conflict due to my changes in the previous patch. Also throw in an assert_spin_locked for safety. And finally appease checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 1 - drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_pm.c | 236 +++++++++++++++++++++++++++ 6 files changed, 242 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 65876c0e04996..e4d2b9f15ae29 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1672,7 +1672,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_mtrrfree; } - dev_priv->display_irqs_enabled = true; intel_irq_init(dev); intel_uncore_sanitize(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8702893a64f69..75e5187ce89f9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1062,7 +1062,7 @@ struct i915_power_well { /* power well enable/disable usage count */ int count; unsigned long domains; - void *data; + unsigned long data; const struct i915_power_well_ops *ops; }; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f43dae9a9735f..55a0a1de3a1cd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -395,6 +395,8 @@ bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); bool ret; + assert_spin_locked(&dev_priv->irq_lock); + ret = !intel_crtc->cpu_fifo_underrun_disabled; if (enable == ret) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 66184423ae291..56edc84098565 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4229,6 +4229,7 @@ static void valleyview_modeset_global_resources(struct drm_device *dev) if (req_cdclk != cur_cdclk) valleyview_set_cdclk(dev, req_cdclk); + modeset_update_crtc_power_domains(dev); } static void valleyview_crtc_enable(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e31eb1ec5b79e..9c7090590776b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -609,6 +609,8 @@ hdmi_to_dig_port(struct intel_hdmi *intel_hdmi) /* i915_irq.c */ bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, enum pipe pipe, bool enable); +bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable); bool intel_set_pch_fifo_underrun_reporting(struct drm_device *dev, enum transcoder pch_transcoder, bool enable); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5b039ca13927c..030880a2cf4ef 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5411,6 +5411,140 @@ static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, return true; } +static void vlv_set_power_well(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well, bool enable) +{ + enum punit_power_well power_well_id = power_well->data; + u32 mask; + u32 state; + u32 ctrl; + + mask = PUNIT_PWRGT_MASK(power_well_id); + state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : + PUNIT_PWRGT_PWR_GATE(power_well_id); + + mutex_lock(&dev_priv->rps.hw_lock); + +#define COND \ + ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) + + if (COND) + goto out; + + ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL); + ctrl &= ~mask; + ctrl |= state; + vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl); + + if (wait_for(COND, 100)) + DRM_ERROR("timout setting power well state %08x (%08x)\n", + state, + vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL)); + +#undef COND + +out: + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + vlv_set_power_well(dev_priv, power_well, power_well->count > 0); +} + +static void vlv_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + vlv_set_power_well(dev_priv, power_well, true); +} + +static void vlv_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + vlv_set_power_well(dev_priv, power_well, false); +} + +static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + int power_well_id = power_well->data; + bool enabled = false; + u32 mask; + u32 state; + u32 ctrl; + + mask = PUNIT_PWRGT_MASK(power_well_id); + ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); + + mutex_lock(&dev_priv->rps.hw_lock); + + state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; + /* + * We only ever set the power-on and power-gate states, anything + * else is unexpected. + */ + WARN_ON(state != PUNIT_PWRGT_PWR_ON(power_well_id) && + state != PUNIT_PWRGT_PWR_GATE(power_well_id)); + if (state == ctrl) + enabled = true; + + /* + * A transient state at this point would mean some unexpected party + * is poking at the power controls too. + */ + ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; + WARN_ON(ctrl != state); + + mutex_unlock(&dev_priv->rps.hw_lock); + + return enabled; +} + +static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + + vlv_set_power_well(dev_priv, power_well, true); + + spin_lock_irq(&dev_priv->irq_lock); + valleyview_enable_display_irqs(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + + /* + * During driver initialization we need to defer enabling hotplug + * processing until fbdev is set up. + */ + if (dev_priv->enable_hotplug_processing) + intel_hpd_init(dev_priv->dev); + + i915_redisable_vga_power_on(dev_priv->dev); +} + +static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + struct drm_device *dev = dev_priv->dev; + enum pipe pipe; + + WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + + spin_lock_irq(&dev_priv->irq_lock); + for_each_pipe(pipe) + __intel_set_cpu_fifo_underrun_reporting(dev, pipe, false); + + valleyview_disable_display_irqs(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + + spin_lock_irq(&dev->vbl_lock); + for_each_pipe(pipe) + reset_vblank_counter(dev, pipe); + spin_unlock_irq(&dev->vbl_lock); + + vlv_set_power_well(dev_priv, power_well, false); +} + static void check_power_well_state(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -5543,6 +5677,35 @@ EXPORT_SYMBOL_GPL(i915_release_power_well); (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) | \ BIT(POWER_DOMAIN_INIT)) +#define VLV_ALWAYS_ON_POWER_DOMAINS BIT(POWER_DOMAIN_INIT) +#define VLV_DISPLAY_POWER_DOMAINS POWER_DOMAIN_MASK + +#define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_CRT) | \ + BIT(POWER_DOMAIN_INIT)) + +#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, .enable = i9xx_always_on_power_well_noop, @@ -5594,6 +5757,77 @@ static struct i915_power_well bdw_power_wells[] = { }, }; +static const struct i915_power_well_ops vlv_display_power_well_ops = { + .sync_hw = vlv_power_well_sync_hw, + .enable = vlv_display_power_well_enable, + .disable = vlv_display_power_well_disable, + .is_enabled = vlv_power_well_enabled, +}; + +static const struct i915_power_well_ops vlv_dpio_power_well_ops = { + .sync_hw = vlv_power_well_sync_hw, + .enable = vlv_power_well_enable, + .disable = vlv_power_well_disable, + .is_enabled = vlv_power_well_enabled, +}; + +static struct i915_power_well vlv_power_wells[] = { + { + .name = "always-on", + .always_on = 1, + .domains = VLV_ALWAYS_ON_POWER_DOMAINS, + .ops = &i9xx_always_on_power_well_ops, + }, + { + .name = "display", + .domains = VLV_DISPLAY_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DISP2D, + .ops = &vlv_display_power_well_ops, + }, + { + .name = "dpio-common", + .domains = VLV_DPIO_CMN_BC_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DPIO_CMN_BC, + .ops = &vlv_dpio_power_well_ops, + }, + { + .name = "dpio-tx-b-01", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, + }, + { + .name = "dpio-tx-b-23", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, + }, + { + .name = "dpio-tx-c-01", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, + }, + { + .name = "dpio-tx-c-23", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, + }, +}; + #define set_power_wells(power_domains, __power_wells) ({ \ (power_domains)->power_wells = (__power_wells); \ (power_domains)->power_well_count = ARRAY_SIZE(__power_wells); \ @@ -5615,6 +5849,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) } else if (IS_BROADWELL(dev_priv->dev)) { set_power_wells(power_domains, bdw_power_wells); hsw_pwr = power_domains; + } else if (IS_VALLEYVIEW(dev_priv->dev)) { + set_power_wells(power_domains, vlv_power_wells); } else { set_power_wells(power_domains, i9xx_always_on_power_well); } From 922044c9dfec40d5adc5d4a757f802e55e3d0a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 14 Feb 2014 14:18:57 +0200 Subject: [PATCH 104/107] drm/i915: Avoid div by zero when pixel clock is large MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the line_time_us isn't zero in the gmch watermarks code as that would cause a div by zero. This can be triggered by specifying a very fast pixel clock for the mode. At some point we should probably just switch over to using the same math we use on PCH platforms which avoids such intermediate rounded results. Also we should verify the user provided mode much more rigorously. At the moment we accept pretty much anything. Note that "very fast mode" here means above 74.25 GHz. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson [danvet: Add Ville's clarification of what "very fast" means.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 030880a2cf4ef..fc96e611a8a79 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1134,7 +1134,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, *plane_wm = display->max_wm; /* Use the large buffer method to calculate cursor watermark */ - line_time_us = ((htotal * 1000) / clock); + line_time_us = max(htotal * 1000 / clock, 1); line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; entries = line_count * 64 * pixel_size; tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; @@ -1210,7 +1210,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, hdisplay = to_intel_crtc(crtc)->config.pipe_src_w; pixel_size = crtc->fb->bits_per_pixel / 8; - line_time_us = (htotal * 1000) / clock; + line_time_us = max(htotal * 1000 / clock, 1); line_count = (latency_ns / line_time_us + 1000) / 1000; line_size = hdisplay * pixel_size; @@ -1443,7 +1443,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc) unsigned long line_time_us; int entries; - line_time_us = ((htotal * 1000) / clock); + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * @@ -1569,7 +1569,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) unsigned long line_time_us; int entries; - line_time_us = (htotal * 1000) / clock; + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * From 4c914c0c7c787b8f730128a8cdcca9c50b0784ab Mon Sep 17 00:00:00 2001 From: Brad Volkin Date: Tue, 18 Feb 2014 10:15:45 -0800 Subject: [PATCH 105/107] drm/i915: Refactor shmem pread setup The command parser is going to need the same synchronization and setup logic, so factor it out for reuse. v2: Add a check that the object is backed by shmem Signed-off-by: Brad Volkin Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_gem.c | 51 ++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 75e5187ce89f9..ef386bf9cecd5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2140,6 +2140,9 @@ void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + int *needs_clflush); + int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 18ea6bccbbf04..177c20722656e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -327,6 +327,42 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, return 0; } +/* + * Pins the specified object's pages and synchronizes the object with + * GPU accesses. Sets needs_clflush to non-zero if the caller should + * flush the object from the CPU cache. + */ +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + int *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + + if (!obj->base.filp) + return -EINVAL; + + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { + /* If we're not in the cpu read domain, set ourself into the gtt + * read domain and manually flush cachelines (if required). This + * optimizes for the case when the gpu will dirty the data + * anyway again before the next pread happens. */ + *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, + obj->cache_level); + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + } + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + return ret; +} + /* Per-page copy function for the shmem pread fastpath. * Flushes invalid cachelines before reading the target if * needs_clflush is set. */ @@ -424,23 +460,10 @@ i915_gem_shmem_pread(struct drm_device *dev, obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { - /* If we're not in the cpu read domain, set ourself into the gtt - * read domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will dirty the data - * anyway again before the next pread happens. */ - needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); - if (ret) - return ret; - } - - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); if (ret) return ret; - i915_gem_object_pin_pages(obj); - offset = args->offset; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, From 351e3db2b3631556607d0d94fa26df8e2e0d0fd8 Mon Sep 17 00:00:00 2001 From: Brad Volkin Date: Tue, 18 Feb 2014 10:15:46 -0800 Subject: [PATCH 106/107] drm/i915: Implement command buffer parsing logic The command parser scans batch buffers submitted via execbuffer ioctls before the driver submits them to hardware. At a high level, it looks for several things: 1) Commands which are explicitly defined as privileged or which should only be used by the kernel driver. The parser generally rejects such commands, with the provision that it may allow some from the drm master process. 2) Commands which access registers. To support correct/enhanced userspace functionality, particularly certain OpenGL extensions, the parser provides a whitelist of registers which userspace may safely access (for both normal and drm master processes). 3) Commands which access privileged memory (i.e. GGTT, HWS page, etc). The parser always rejects such commands. See the overview comment in the source for more details. This patch only implements the logic. Subsequent patches will build the tables that drive the parser. v2: Don't set the secure bit if the parser succeeds Fail harder during init Makefile cleanup Kerneldoc cleanup Clarify module param description Convert ints to bools in a few places Move client/subclient defs to i915_reg.h Remove the bits_count field OTC-Tracker: AXIA-4631 Change-Id: I50b98c71c6655893291c78a2d1b8954577b37a30 Signed-off-by: Brad Volkin Reviewed-by: Jani Nikula [danvet: Appease checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 485 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 93 ++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 18 + drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/i915_reg.h | 12 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 32 ++ 8 files changed, 648 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_cmd_parser.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4850494bd548f..3569122b1995c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \ i915_gem_gtt.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ + i915_cmd_parser.o \ i915_params.o \ i915_sysfs.o \ i915_trace_points.o \ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c new file mode 100644 index 0000000000000..7a5756e9bb864 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -0,0 +1,485 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Brad Volkin + * + */ + +#include "i915_drv.h" + +/** + * DOC: i915 batch buffer command parser + * + * Motivation: + * Certain OpenGL features (e.g. transform feedback, performance monitoring) + * require userspace code to submit batches containing commands such as + * MI_LOAD_REGISTER_IMM to access various registers. Unfortunately, some + * generations of the hardware will noop these commands in "unsecure" batches + * (which includes all userspace batches submitted via i915) even though the + * commands may be safe and represent the intended programming model of the + * device. + * + * The software command parser is similar in operation to the command parsing + * done in hardware for unsecure batches. However, the software parser allows + * some operations that would be noop'd by hardware, if the parser determines + * the operation is safe, and submits the batch as "secure" to prevent hardware + * parsing. + * + * Threats: + * At a high level, the hardware (and software) checks attempt to prevent + * granting userspace undue privileges. There are three categories of privilege. + * + * First, commands which are explicitly defined as privileged or which should + * only be used by the kernel driver. The parser generally rejects such + * commands, though it may allow some from the drm master process. + * + * Second, commands which access registers. To support correct/enhanced + * userspace functionality, particularly certain OpenGL extensions, the parser + * provides a whitelist of registers which userspace may safely access (for both + * normal and drm master processes). + * + * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). + * The parser always rejects such commands. + * + * The majority of the problematic commands fall in the MI_* range, with only a + * few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW). + * + * Implementation: + * Each ring maintains tables of commands and registers which the parser uses in + * scanning batch buffers submitted to that ring. + * + * Since the set of commands that the parser must check for is significantly + * smaller than the number of commands supported, the parser tables contain only + * those commands required by the parser. This generally works because command + * opcode ranges have standard command length encodings. So for commands that + * the parser does not need to check, it can easily skip them. This is + * implementated via a per-ring length decoding vfunc. + * + * Unfortunately, there are a number of commands that do not follow the standard + * length encoding for their opcode range, primarily amongst the MI_* commands. + * To handle this, the parser provides a way to define explicit "skip" entries + * in the per-ring command tables. + * + * Other command table entries map fairly directly to high level categories + * mentioned above: rejected, master-only, register whitelist. The parser + * implements a number of checks, including the privileged memory checks, via a + * general bitmasking mechanism. + */ + +static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + u32 subclient = + (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT) + return 0x3F; + else if (client == INSTR_RC_CLIENT) { + if (subclient == INSTR_MEDIA_SUBCLIENT) + return 0xFFFF; + else + return 0xFF; + } + + DRM_DEBUG_DRIVER("CMD: Abnormal rcs cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + u32 subclient = + (cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT) + return 0x3F; + else if (client == INSTR_RC_CLIENT) { + if (subclient == INSTR_MEDIA_SUBCLIENT) + return 0xFFF; + else + return 0xFF; + } + + DRM_DEBUG_DRIVER("CMD: Abnormal bsd cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT) + return 0x3F; + else if (client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + +static void validate_cmds_sorted(struct intel_ring_buffer *ring) +{ + int i; + + if (!ring->cmd_tables || ring->cmd_table_count == 0) + return; + + for (i = 0; i < ring->cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = &ring->cmd_tables[i]; + u32 previous = 0; + int j; + + for (j = 0; j < table->count; j++) { + const struct drm_i915_cmd_descriptor *desc = + &table->table[i]; + u32 curr = desc->cmd.value & desc->cmd.mask; + + if (curr < previous) + DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n", + ring->id, i, j, curr, previous); + + previous = curr; + } + } +} + +static void check_sorted(int ring_id, const u32 *reg_table, int reg_count) +{ + int i; + u32 previous = 0; + + for (i = 0; i < reg_count; i++) { + u32 curr = reg_table[i]; + + if (curr < previous) + DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n", + ring_id, i, curr, previous); + + previous = curr; + } +} + +static void validate_regs_sorted(struct intel_ring_buffer *ring) +{ + check_sorted(ring->id, ring->reg_table, ring->reg_count); + check_sorted(ring->id, ring->master_reg_table, ring->master_reg_count); +} + +/** + * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer + * @ring: the ringbuffer to initialize + * + * Optionally initializes fields related to batch buffer command parsing in the + * struct intel_ring_buffer based on whether the platform requires software + * command parsing. + */ +void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) +{ + if (!IS_GEN7(ring->dev)) + return; + + switch (ring->id) { + case RCS: + ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask; + break; + case VCS: + ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + case BCS: + ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + break; + case VECS: + /* VECS can use the same length_mask function as VCS */ + ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + default: + DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n", + ring->id); + BUG(); + } + + validate_cmds_sorted(ring); + validate_regs_sorted(ring); +} + +static const struct drm_i915_cmd_descriptor* +find_cmd_in_table(const struct drm_i915_cmd_table *table, + u32 cmd_header) +{ + int i; + + for (i = 0; i < table->count; i++) { + const struct drm_i915_cmd_descriptor *desc = &table->table[i]; + u32 masked_cmd = desc->cmd.mask & cmd_header; + u32 masked_value = desc->cmd.value & desc->cmd.mask; + + if (masked_cmd == masked_value) + return desc; + } + + return NULL; +} + +/* + * Returns a pointer to a descriptor for the command specified by cmd_header. + * + * The caller must supply space for a default descriptor via the default_desc + * parameter. If no descriptor for the specified command exists in the ring's + * command parser tables, this function fills in default_desc based on the + * ring's default length encoding and returns default_desc. + */ +static const struct drm_i915_cmd_descriptor* +find_cmd(struct intel_ring_buffer *ring, + u32 cmd_header, + struct drm_i915_cmd_descriptor *default_desc) +{ + u32 mask; + int i; + + for (i = 0; i < ring->cmd_table_count; i++) { + const struct drm_i915_cmd_descriptor *desc; + + desc = find_cmd_in_table(&ring->cmd_tables[i], cmd_header); + if (desc) + return desc; + } + + mask = ring->get_cmd_length_mask(cmd_header); + if (!mask) + return NULL; + + BUG_ON(!default_desc); + default_desc->flags = CMD_DESC_SKIP; + default_desc->length.mask = mask; + + return default_desc; +} + +static bool valid_reg(const u32 *table, int count, u32 addr) +{ + if (table && count != 0) { + int i; + + for (i = 0; i < count; i++) { + if (table[i] == addr) + return true; + } + } + + return false; +} + +static u32 *vmap_batch(struct drm_i915_gem_object *obj) +{ + int i; + void *addr = NULL; + struct sg_page_iter sg_iter; + struct page **pages; + + pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); + if (pages == NULL) { + DRM_DEBUG_DRIVER("Failed to get space for pages\n"); + goto finish; + } + + i = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { + pages[i] = sg_page_iter_page(&sg_iter); + i++; + } + + addr = vmap(pages, i, 0, PAGE_KERNEL); + if (addr == NULL) { + DRM_DEBUG_DRIVER("Failed to vmap pages\n"); + goto finish; + } + +finish: + if (pages) + drm_free_large(pages); + return (u32*)addr; +} + +/** + * i915_needs_cmd_parser() - should a given ring use software command parsing? + * @ring: the ring in question + * + * Only certain platforms require software batch buffer command parsing, and + * only when enabled via module paramter. + * + * Return: true if the ring requires software command parsing + */ +bool i915_needs_cmd_parser(struct intel_ring_buffer *ring) +{ + /* No command tables indicates a platform without parsing */ + if (!ring->cmd_tables) + return false; + + return (i915.enable_cmd_parser == 1); +} + +#define LENGTH_BIAS 2 + +/** + * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ring: the ring on which the batch is to execute + * @batch_obj: the batch buffer in question + * @batch_start_offset: byte offset in the batch at which execution starts + * @is_master: is the submitting process the drm master? + * + * Parses the specified batch buffer looking for privilege violations as + * described in the overview. + * + * Return: non-zero if the parser finds violations or otherwise fails + */ +int i915_parse_cmds(struct intel_ring_buffer *ring, + struct drm_i915_gem_object *batch_obj, + u32 batch_start_offset, + bool is_master) +{ + int ret = 0; + u32 *cmd, *batch_base, *batch_end; + struct drm_i915_cmd_descriptor default_desc = { 0 }; + int needs_clflush = 0; + + ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush); + if (ret) { + DRM_DEBUG_DRIVER("CMD: failed to prep read\n"); + return ret; + } + + batch_base = vmap_batch(batch_obj); + if (!batch_base) { + DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); + i915_gem_object_unpin_pages(batch_obj); + return -ENOMEM; + } + + if (needs_clflush) + drm_clflush_virt_range((char *)batch_base, batch_obj->base.size); + + cmd = batch_base + (batch_start_offset / sizeof(*cmd)); + batch_end = cmd + (batch_obj->base.size / sizeof(*batch_end)); + + while (cmd < batch_end) { + const struct drm_i915_cmd_descriptor *desc; + u32 length; + + if (*cmd == MI_BATCH_BUFFER_END) + break; + + desc = find_cmd(ring, *cmd, &default_desc); + if (!desc) { + DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", + *cmd); + ret = -EINVAL; + break; + } + + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else + length = ((*cmd & desc->length.mask) + LENGTH_BIAS); + + if ((batch_end - cmd) < length) { + DRM_DEBUG_DRIVER("CMD: Command length exceeds batch length: 0x%08X length=%d batchlen=%ld\n", + *cmd, + length, + batch_end - cmd); + ret = -EINVAL; + break; + } + + if (desc->flags & CMD_DESC_REJECT) { + DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); + ret = -EINVAL; + break; + } + + if ((desc->flags & CMD_DESC_MASTER) && !is_master) { + DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", + *cmd); + ret = -EINVAL; + break; + } + + if (desc->flags & CMD_DESC_REGISTER) { + u32 reg_addr = cmd[desc->reg.offset] & desc->reg.mask; + + if (!valid_reg(ring->reg_table, + ring->reg_count, reg_addr)) { + if (!is_master || + !valid_reg(ring->master_reg_table, + ring->master_reg_count, + reg_addr)) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n", + reg_addr, + *cmd, + ring->id); + ret = -EINVAL; + break; + } + } + } + + if (desc->flags & CMD_DESC_BITMASK) { + int i; + + for (i = 0; i < MAX_CMD_DESC_BITMASKS; i++) { + u32 dword; + + if (desc->bits[i].mask == 0) + break; + + dword = cmd[desc->bits[i].offset] & + desc->bits[i].mask; + + if (dword != desc->bits[i].expected) { + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n", + *cmd, + desc->bits[i].mask, + desc->bits[i].expected, + dword, ring->id); + ret = -EINVAL; + break; + } + } + + if (ret) + break; + } + + cmd += length; + } + + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; + } + + vunmap(batch_base); + + i915_gem_object_unpin_pages(batch_obj); + + return ret; +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ef386bf9cecd5..7b6dfdfb2d8d1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1849,6 +1849,90 @@ struct drm_i915_file_private { atomic_t rps_wait_boost; }; +/* + * A command that requires special handling by the command parser. + */ +struct drm_i915_cmd_descriptor { + /* + * Flags describing how the command parser processes the command. + * + * CMD_DESC_FIXED: The command has a fixed length if this is set, + * a length mask if not set + * CMD_DESC_SKIP: The command is allowed but does not follow the + * standard length encoding for the opcode range in + * which it falls + * CMD_DESC_REJECT: The command is never allowed + * CMD_DESC_REGISTER: The command should be checked against the + * register whitelist for the appropriate ring + * CMD_DESC_MASTER: The command is allowed if the submitting process + * is the DRM master + */ + u32 flags; +#define CMD_DESC_FIXED (1<<0) +#define CMD_DESC_SKIP (1<<1) +#define CMD_DESC_REJECT (1<<2) +#define CMD_DESC_REGISTER (1<<3) +#define CMD_DESC_BITMASK (1<<4) +#define CMD_DESC_MASTER (1<<5) + + /* + * The command's unique identification bits and the bitmask to get them. + * This isn't strictly the opcode field as defined in the spec and may + * also include type, subtype, and/or subop fields. + */ + struct { + u32 value; + u32 mask; + } cmd; + + /* + * The command's length. The command is either fixed length (i.e. does + * not include a length field) or has a length field mask. The flag + * CMD_DESC_FIXED indicates a fixed length. Otherwise, the command has + * a length mask. All command entries in a command table must include + * length information. + */ + union { + u32 fixed; + u32 mask; + } length; + + /* + * Describes where to find a register address in the command to check + * against the ring's register whitelist. Only valid if flags has the + * CMD_DESC_REGISTER bit set. + */ + struct { + u32 offset; + u32 mask; + } reg; + +#define MAX_CMD_DESC_BITMASKS 3 + /* + * Describes command checks where a particular dword is masked and + * compared against an expected value. If the command does not match + * the expected value, the parser rejects it. Only valid if flags has + * the CMD_DESC_BITMASK bit set. Only entries where mask is non-zero + * are valid. + */ + struct { + u32 offset; + u32 mask; + u32 expected; + } bits[MAX_CMD_DESC_BITMASKS]; +}; + +/* + * A table of commands requiring special handling by the command parser. + * + * Each ring has an array of tables. Each table consists of an array of command + * descriptors, which must be sorted with command opcodes in ascending order. + */ +struct drm_i915_cmd_table { + const struct drm_i915_cmd_descriptor *table; + int count; +}; + #define INTEL_INFO(dev) (&to_i915(dev)->info) #define IS_I830(dev) ((dev)->pdev->device == 0x3577) @@ -2003,6 +2087,7 @@ struct i915_params { int enable_pc8; int pc8_timeout; int invert_brightness; + int enable_cmd_parser; /* leave bools at the end to not create holes */ bool enable_hangcheck; bool fastboot; @@ -2480,6 +2565,14 @@ void i915_destroy_error_state(struct drm_device *dev); void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone); const char *i915_cache_level_str(int type); +/* i915_cmd_parser.c */ +void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring); +bool i915_needs_cmd_parser(struct intel_ring_buffer *ring); +int i915_parse_cmds(struct intel_ring_buffer *ring, + struct drm_i915_gem_object *batch_obj, + u32 batch_start_offset, + bool is_master); + /* i915_suspend.c */ extern int i915_save_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d7229ad2bd221..3851a1b1dc88d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1182,6 +1182,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + if (i915_needs_cmd_parser(ring)) { + ret = i915_parse_cmds(ring, + batch_obj, + args->batch_start_offset, + file->is_master); + if (ret) + goto err; + + /* + * XXX: Actually do this when enabling batch copy... + * + * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit + * from MI_BATCH_BUFFER_START commands issued in the + * dispatch_execbuffer implementations. We specifically don't + * want that set when the command parser is enabled. + */ + } + /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 3b482585c5ae9..a66ffb652bee9 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = { .reset = true, .invert_brightness = 0, .disable_display = 0, + .enable_cmd_parser = 0, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -157,3 +158,7 @@ MODULE_PARM_DESC(invert_brightness, module_param_named(disable_display, i915.disable_display, bool, 0600); MODULE_PARM_DESC(disable_display, "Disable display (default: false)"); + +module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600); +MODULE_PARM_DESC(enable_cmd_parser, + "Enable command parsing (1=enabled, 0=disabled [default])"); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b719385c45112..146609ab42bb9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -174,6 +174,18 @@ #define VGA_CR_INDEX_CGA 0x3d4 #define VGA_CR_DATA_CGA 0x3d5 +/* + * Instruction field definitions used by the command parser + */ +#define INSTR_CLIENT_SHIFT 29 +#define INSTR_CLIENT_MASK 0xE0000000 +#define INSTR_MI_CLIENT 0x0 +#define INSTR_BC_CLIENT 0x2 +#define INSTR_RC_CLIENT 0x3 +#define INSTR_SUBCLIENT_SHIFT 27 +#define INSTR_SUBCLIENT_MASK 0x18000000 +#define INSTR_MEDIA_SUBCLIENT 0x2 + /* * Memory interface instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b2d4f4852c986..5629cc7a77321 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1388,6 +1388,8 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (IS_I830(ring->dev) || IS_845G(ring->dev)) ring->effective_size -= 128; + i915_cmd_parser_init_ring(ring); + return 0; err_unmap: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 38c757e136dc9..9d4c3b1182e9e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -164,6 +164,38 @@ struct intel_ring_buffer { u32 gtt_offset; volatile u32 *cpu_page; } scratch; + + /* + * Tables of commands the command parser needs to know about + * for this ring. + */ + const struct drm_i915_cmd_table *cmd_tables; + int cmd_table_count; + + /* + * Table of registers allowed in commands that read/write registers. + */ + const u32 *reg_table; + int reg_count; + + /* + * Table of registers allowed in commands that read/write registers, but + * only from the DRM master. + */ + const u32 *master_reg_table; + int master_reg_count; + + /* + * Returns the bitmask for the length field of the specified command. + * Return 0 for an unrecognized/invalid command. + * + * If the command parser finds an entry for a command in the ring's + * cmd_tables, it gets the command's length based on the table entry. + * If not, it calls this function to determine the per-ring length field + * encoding for the command (i.e. certain opcode ranges use certain bits + * to encode the command length in the header). + */ + u32 (*get_cmd_length_mask)(u32 cmd_header); }; static inline bool From 2fae6a860ca9adb0c881f6dcd633df775c2520e9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 7 Mar 2014 09:17:21 +0100 Subject: [PATCH 107/107] drm/i915: Go OCD on the Makefile Chris suggested to split things up a bit into the different parts of the driver and also sort it all correctly, with the hope that we're trying to organize things a bit better eventually. It should also help newcomers to orient themselves a bit better. v2: - Move intel_pm.c to the core - to make things perfect we should split out the modeset related pm features (psr/fbc) into a separate file. Maybe something Rodrigo can do once the PSR patches have settled. - Split the modesetting sections into core and encoders/outputs. intel_ddi.c is a bit funky since it has core hsw+ support and ddi output support. Whatever. v3: Failed to git add ... v4: Really go ocd, i.e. spelling fix in a comment from Jani. Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 84 ++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3569122b1995c..077e81b1065dd 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -3,60 +3,70 @@ # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. ccflags-y := -Iinclude/drm -i915-y := i915_drv.o i915_dma.o i915_irq.o \ - i915_gpu_error.o \ + +# Please keep these build lists sorted! + +# core driver code +i915-y := i915_drv.o \ + i915_params.o \ i915_suspend.o \ - i915_gem.o \ + i915_sysfs.o \ + intel_pm.o +i915-$(CONFIG_COMPAT) += i915_ioc32.o +i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o + +# GEM code +i915-y += i915_cmd_parser.o \ i915_gem_context.o \ i915_gem_debug.o \ + i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ i915_gem_gtt.o \ + i915_gem.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_cmd_parser.o \ - i915_params.o \ - i915_sysfs.o \ + i915_gpu_error.o \ + i915_irq.o \ i915_trace_points.o \ - i915_ums.o \ + intel_ringbuffer.o \ + intel_uncore.o + +# modesetting core code +i915-y += intel_bios.o \ intel_display.o \ - intel_crt.o \ - intel_lvds.o \ - intel_dsi.o \ - intel_dsi_cmd.o \ - intel_dsi_pll.o \ - intel_bios.o \ - intel_ddi.o \ - intel_dp.o \ - intel_hdmi.o \ - intel_sdvo.o \ intel_modes.o \ - intel_panel.o \ - intel_pm.o \ - intel_i2c.o \ - intel_tv.o \ - intel_dvo.o \ - intel_ringbuffer.o \ - intel_overlay.o \ - intel_sprite.o \ intel_opregion.o \ + intel_overlay.o \ intel_sideband.o \ - intel_uncore.o \ + intel_sprite.o +i915-$(CONFIG_ACPI) += intel_acpi.o +i915-$(CONFIG_DRM_I915_FBDEV) += intel_fbdev.o + +# modesetting output/encoder code +i915-y += dvo_ch7017.o \ dvo_ch7xxx.o \ - dvo_ch7017.o \ dvo_ivch.o \ - dvo_tfp410.o \ - dvo_sil164.o \ dvo_ns2501.o \ - i915_gem_dmabuf.o - -i915-$(CONFIG_COMPAT) += i915_ioc32.o - -i915-$(CONFIG_ACPI) += intel_acpi.o - -i915-$(CONFIG_DRM_I915_FBDEV) += intel_fbdev.o + dvo_sil164.o \ + dvo_tfp410.o \ + intel_crt.o \ + intel_ddi.o \ + intel_dp.o \ + intel_dsi_cmd.o \ + intel_dsi.o \ + intel_dsi_pll.o \ + intel_dvo.o \ + intel_hdmi.o \ + intel_i2c.o \ + intel_lvds.o \ + intel_panel.o \ + intel_sdvo.o \ + intel_tv.o -i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o +# legacy horrors +i915-y += i915_dma.o \ + i915_ums.o obj-$(CONFIG_DRM_I915) += i915.o