From 97f9ca383dca6f4b425fb3c4709405fb8272a15f Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Fri, 11 Sep 2020 14:52:39 +0800
Subject: [PATCH 1/4] drm/i915/gvt: Allow zero out HWSP addr on hws_pga_write

Guest driver may reset HWSP to 0 as init value during D3->D0:
The full sequence is:
 - Boot ->D0
 - Update HWSP
 - D0->D3
 - ...In D3 state...
 - D3->D0
 - DMLR reset.
 - Set engine HWSP to 0.
 - Set engine ring mode to 0.
 - Set engine HWSP to correct value.
 - Set engine ring mode to correct value.
Ring mode is masked register so set 0 won't take effect.
However HWPS addr 0 is considered as invalid GGTT address which will
report error like:
       gvt: vgpu 1: write invalid HWSP address, reg:0x2080, value:0x0
       gvt: vgpu 1: fail to emulate MMIO write 00002080 len 4
       Detected your guest driver doesn't support GVT-g.
       Now vgpu 2 will enter failsafe mode.

Zero out HWSP addr is considered as a valid setting from device driver
so don't treat it as invalid HWSP addr.

V2:
Treat HWSP addr 0 as valid. (zhenyu)

V3:
Change patch title.

Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200911065239.147789-1-colin.xu@intel.com
---
 drivers/gpu/drm/i915/gvt/handlers.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 05f3bc98d242d..388982fe3e02c 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1489,7 +1489,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
 	const struct intel_engine_cs *engine =
 		intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
 
-	if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
+	if (value != 0 &&
+	    !intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
 		gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n",
 			      offset, value);
 		return -EINVAL;

From 8fe105679765700378eb328495fcfe1566cdbbd0 Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Mon, 12 Oct 2020 12:52:31 +0800
Subject: [PATCH 2/4] drm/i915/gvt: Set SNOOP for PAT3 on BXT/APL to workaround
 GPU BB hang

If guest fills non-priv bb on ApolloLake/Broxton as Mesa i965 does in:
717e7539124d (i965: Use a WC map and memcpy for the batch instead of pw-)
Due to the missing flush of bb filled by VM vCPU, host GPU hangs on
executing these MI_BATCH_BUFFER.

Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT
PML4 PTE: PAT(0) PCD(1) PWT(1).

The performance is still expected to be low, will need further improvement.

Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20201012045231.226748-1-colin.xu@intel.com
---
 drivers/gpu/drm/i915/gvt/handlers.c | 32 ++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 388982fe3e02c..beafc5e435b47 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1651,6 +1651,34 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
+/**
+ * FixMe:
+ * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did:
+ * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.)
+ * Due to the missing flush of bb filled by VM vCPU, host GPU hangs on executing
+ * these MI_BATCH_BUFFER.
+ * Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT
+ * PML4 PTE: PAT(0) PCD(1) PWT(1).
+ * The performance is still expected to be low, will need further improvement.
+ */
+static int bxt_ppat_low_write(struct intel_vgpu *vgpu, unsigned int offset,
+			      void *p_data, unsigned int bytes)
+{
+	u64 pat =
+		GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(1, 0) |
+		GEN8_PPAT(2, 0) |
+		GEN8_PPAT(3, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+	vgpu_vreg(vgpu, offset) = lower_32_bits(pat);
+
+	return 0;
+}
+
 static int guc_status_read(struct intel_vgpu *vgpu,
 			   unsigned int offset, void *p_data,
 			   unsigned int bytes)
@@ -2812,7 +2840,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
 
-	MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS);
+	MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS & ~D_BXT);
 	MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
 
 	MMIO_D(GAMTARBMODE, D_BDW_PLUS);
@@ -3316,6 +3344,8 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_DH(GEN8_PRIVATE_PAT_LO, D_BXT, NULL, bxt_ppat_low_write);
+
 	return 0;
 }
 

From baec997285e63ad3e03d8b8d45e14776cd737f62 Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Fri, 16 Oct 2020 13:40:59 +0800
Subject: [PATCH 3/4] drm/i915/gvt: Only pin/unpin intel_context along with
 workload

One issue exposed after below commit with which the system will freeze
at suspend after vGPU is created (no need to activate the vGPU).
commit e6ba76480299 ("drm/i915: Remove i915->kernel_context")

Old implementation pin the intel_context at setup_submission and
unpin it at clean_submission. So after some vGPU is created, the
intel_context is always pinned there although no workload using it.
It will then block i915 enter suspend state.

There is no need to pin it all the time. Pin/unpin it around workload
lifecycle is more reasonable. After GVT enabled suspend/resume, the
pinned intel_context will also get unpined when userspace put VM process
into suspend state since all workloads are retired, then it's safe to
unpin all intel_context for workloads created. So move the pin/unpin to
create_workload and destroy_workload, while still keep the
create/destroy in old place.

V2:
Rebase.

Fixes: e6ba76480299 ("drm/i915: Remove i915->kernel_context")
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20201016054059.238371-1-colin.xu@intel.com
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 3c3b9842bbbdc..68b2d10108fd1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1268,7 +1268,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 
 	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
 	for_each_engine(engine, vgpu->gvt->gt, id)
-		intel_context_unpin(s->shadow[id]);
+		intel_context_put(s->shadow[id]);
 
 	kmem_cache_destroy(s->workloads);
 }
@@ -1360,11 +1360,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 			ce->ring = __intel_context_ring_size(ring_size);
 		}
 
-		ret = intel_context_pin(ce);
-		intel_context_put(ce);
-		if (ret)
-			goto out_shadow_ctx;
-
 		s->shadow[i] = ce;
 	}
 
@@ -1396,7 +1391,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		if (IS_ERR(s->shadow[i]))
 			break;
 
-		intel_context_unpin(s->shadow[i]);
 		intel_context_put(s->shadow[i]);
 	}
 	i915_vm_put(&ppgtt->vm);
@@ -1470,6 +1464,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu_submission *s = &workload->vgpu->submission;
 
+	intel_context_unpin(s->shadow[workload->engine->id]);
 	release_shadow_batch_buffer(workload);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 
@@ -1715,6 +1710,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu,
 		return ERR_PTR(ret);
 	}
 
+	ret = intel_context_pin(s->shadow[engine->id]);
+	if (ret) {
+		intel_vgpu_destroy_workload(workload);
+		return ERR_PTR(ret);
+	}
+
 	return workload;
 }
 

From 92010a97098c4c9fd777408cc98064d26b32695b Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Fri, 16 Oct 2020 13:29:13 +0800
Subject: [PATCH 4/4] drm/i915/gvt: Fix mmio handler break on BXT/APL.

- Remove dup mmio handler for BXT/APL. Otherwise mmio handler will fail
  to init.
- Add engine GPR with F_CMD_ACCESS since BXT/APL will load them via
  LRI. Otherwise, guest will enter failsafe mode.

V2:
Use RCS/BCS GPR macros instead of offset.
Revise commit message.

V3:
Use GEN8_RING_CS_GPR macros on ring base.

Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20201016052913.209248-1-colin.xu@intel.com
---
 drivers/gpu/drm/i915/gvt/handlers.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 6a026539c873e..eb342a7599438 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -3168,7 +3168,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 		 NULL, NULL);
 
 	MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS);
+	MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS & ~D_BXT);
 
 	return 0;
 }
@@ -3342,6 +3342,16 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT);
 	MMIO_D(GEN6_GFXPAUSE, D_BXT);
 	MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN8_L3CNTLREG, D_BXT, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20D8), D_BXT, F_CMD_ACCESS, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(RENDER_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(GEN6_BSD_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(BLT_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(VEBOX_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
 
 	MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);