Skip to content

Commit

Permalink
drm/i915/gvt: Optimize ring siwtch 2x faster again by light weight mm…
Browse files Browse the repository at this point in the history
…io access wrapper

The I915_READ/WRITE is not only a mmio read/write, it also contains
debug checking and Forcewake domain lookup. This is too heavy for
GVT ring switch case which access batch of mmio registers on ring
switch. We can handle Forcewake manually and use the raw
i915_read/write instead. The benefit from this is 2x faster mmio
switch performance.
         Before       After
cycles  ~550000      ~250000

v2: Use existing I915_READ_FW/I915_WRITE_FW macro. (zhenyu)

Signed-off-by: Changbin Du <changbin.du@intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
  • Loading branch information
Changbin Du authored and Zhenyu Wang committed Aug 10, 2017
1 parent f846c8d commit 4671ea2
Showing 1 changed file with 26 additions and 13 deletions.
39 changes: 26 additions & 13 deletions drivers/gpu/drm/i915/gvt/render.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,16 +207,16 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)

offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
gen9_render_mocs[ring_id][i] = I915_READ(offset);
gen9_render_mocs[ring_id][i] = I915_READ_FW(offset);
I915_WRITE(offset, vgpu_vreg(vgpu, offset));
offset.reg += 4;
}

if (ring_id == RCS) {
l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) {
gen9_render_mocs_L3[i] = I915_READ(l3_offset);
I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset);
I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset));
l3_offset.reg += 4;
}
}
Expand All @@ -240,16 +240,16 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)

offset.reg = regs[ring_id];
for (i = 0; i < 64; i++) {
vgpu_vreg(vgpu, offset) = I915_READ(offset);
I915_WRITE(offset, gen9_render_mocs[ring_id][i]);
vgpu_vreg(vgpu, offset) = I915_READ_FW(offset);
I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
offset.reg += 4;
}

if (ring_id == RCS) {
l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) {
vgpu_vreg(vgpu, l3_offset) = I915_READ(l3_offset);
I915_WRITE(l3_offset, gen9_render_mocs_L3[i]);
vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset);
I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]);
l3_offset.reg += 4;
}
}
Expand Down Expand Up @@ -284,7 +284,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
if (mmio->ring_id != ring_id)
continue;

mmio->value = I915_READ(mmio->reg);
mmio->value = I915_READ_FW(mmio->reg);

/*
* if it is an inhibit context, load in_context mmio
Expand All @@ -301,7 +301,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
else
v = vgpu_vreg(vgpu, mmio->reg);

I915_WRITE(mmio->reg, v);
I915_WRITE_FW(mmio->reg, v);
last_reg = mmio->reg;

trace_render_mmio(vgpu->id, "load",
Expand All @@ -311,7 +311,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)

/* Make sure the swiched MMIOs has taken effect. */
if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
POSTING_READ(last_reg);
I915_READ_FW(last_reg);

handle_tlb_pending_event(vgpu, ring_id);
}
Expand All @@ -338,7 +338,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
if (mmio->ring_id != ring_id)
continue;

vgpu_vreg(vgpu, mmio->reg) = I915_READ(mmio->reg);
vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg);

if (mmio->mask) {
vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16);
Expand All @@ -349,7 +349,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
if (mmio->in_context)
continue;

I915_WRITE(mmio->reg, v);
I915_WRITE_FW(mmio->reg, v);
last_reg = mmio->reg;

trace_render_mmio(vgpu->id, "restore",
Expand All @@ -359,7 +359,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)

/* Make sure the swiched MMIOs has taken effect. */
if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
POSTING_READ(last_reg);
I915_READ_FW(last_reg);
}

/**
Expand All @@ -374,12 +374,23 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
void intel_gvt_switch_mmio(struct intel_vgpu *pre,
struct intel_vgpu *next, int ring_id)
{
struct drm_i915_private *dev_priv;

if (WARN_ON(!pre && !next))
return;

gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
pre ? "vGPU" : "host", next ? "vGPU" : "HOST");

dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;

/**
* We are using raw mmio access wrapper to improve the
* performace for batch mmio read/write, so we need
* handle forcewake mannually.
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);

/**
* TODO: Optimize for vGPU to vGPU switch by merging
* switch_mmio_to_host() and switch_mmio_to_vgpu().
Expand All @@ -389,4 +400,6 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,

if (next)
switch_mmio_to_vgpu(next, ring_id);

intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}

0 comments on commit 4671ea2

Please sign in to comment.