Skip to content

Commit

Permalink
drm/i915/gvt: Separate cmd scan from request allocation
Browse files Browse the repository at this point in the history
Currently i915 request structure and shadow ring buffer are allocated
before command scan, so it will have to restore to previous states once
any error happens afterwards in the long dispatch_workload path.

This patch is to introduce a reserved ring buffer created at the beginning
of vGPU initialization. Workload will be coped to this reserved buffer and
be scanned first, the i915 request and shadow ring buffer are only
allocated after the result of scan is successful.

To balance the memory usage and buffer alloc time, the coming bigger ring
buffer will be reallocated and kept until more bigger buffer is coming.

v2:
- use kmalloc for the smaller ring buffer, realloc if required. (Zhenyu)

v3:
- remove the dynamically allocated ring buffer. (Zhenyu)

v4:
- code style polish.
- kfree previous allocated buffer once kmalloc failed. (Zhenyu)

Signed-off-by: fred gao <fred.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
  • Loading branch information
fred gao authored and Zhenyu Wang committed Sep 8, 2017
1 parent f090a00 commit 0a53bc0
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 38 deletions.
31 changes: 20 additions & 11 deletions drivers/gpu/drm/i915/gvt/cmd_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -2603,7 +2603,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
u32 *cs;
void *shadow_ring_buffer_va;
int ring_id = workload->ring_id;
int ret;

guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
Expand All @@ -2616,34 +2617,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_tail = workload->rb_start + workload->rb_tail;
gma_top = workload->rb_start + guest_rb_size;

/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs))
return PTR_ERR(cs);
if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) {
void *va = vgpu->reserve_ring_buffer_va[ring_id];
/* realloc the new ring buffer if needed */
vgpu->reserve_ring_buffer_va[ring_id] =
krealloc(va, workload->rb_len, GFP_KERNEL);
if (!vgpu->reserve_ring_buffer_va[ring_id]) {
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
return -ENOMEM;
}
vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len;
}

shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id];

/* get shadow ring buffer va */
workload->shadow_ring_buffer_va = cs;
workload->shadow_ring_buffer_va = shadow_ring_buffer_va;

/* head > tail --> copy head <-> top */
if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_top, cs);
gma_head, gma_top, shadow_ring_buffer_va);
if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret;
}
cs += ret / sizeof(u32);
shadow_ring_buffer_va += ret;
gma_head = workload->rb_start;
}

/* copy head or start <-> tail */
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
shadow_ring_buffer_va);
if (ret < 0) {
gvt_vgpu_err("fail to copy guest ring buffer\n");
return ret;
}
cs += ret / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0;
}

Expand Down
30 changes: 30 additions & 0 deletions drivers/gpu/drm/i915/gvt/execlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -820,10 +820,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)

void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
{
enum intel_engine_id i;
struct intel_engine_cs *engine;

clean_workloads(vgpu, ALL_ENGINES);
kmem_cache_destroy(vgpu->workloads);

for_each_engine(engine, vgpu->gvt->dev_priv, i) {
kfree(vgpu->reserve_ring_buffer_va[i]);
vgpu->reserve_ring_buffer_va[i] = NULL;
vgpu->reserve_ring_buffer_size[i] = 0;
}

}

#define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8)
int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
{
enum intel_engine_id i;
Expand All @@ -843,7 +854,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
if (!vgpu->workloads)
return -ENOMEM;

/* each ring has a shadow ring buffer until vgpu destroyed */
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
vgpu->reserve_ring_buffer_va[i] =
kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL);
if (!vgpu->reserve_ring_buffer_va[i]) {
gvt_vgpu_err("fail to alloc reserve ring buffer\n");
goto out;
}
vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE;
}
return 0;
out:
for_each_engine(engine, vgpu->gvt->dev_priv, i) {
if (vgpu->reserve_ring_buffer_size[i]) {
kfree(vgpu->reserve_ring_buffer_va[i]);
vgpu->reserve_ring_buffer_va[i] = NULL;
vgpu->reserve_ring_buffer_size[i] = 0;
}
}
return -ENOMEM;
}

void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/gvt/gvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ struct intel_vgpu {
struct list_head workload_q_head[I915_NUM_ENGINES];
struct kmem_cache *workloads;
atomic_t running_workload_num;
/* 1/2K for each reserve ring buffer */
void *reserve_ring_buffer_va[I915_NUM_ENGINES];
int reserve_ring_buffer_size[I915_NUM_ENGINES];
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
struct i915_gem_context *shadow_ctx;
DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
Expand Down
84 changes: 57 additions & 27 deletions drivers/gpu/drm/i915/gvt/scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,34 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
ce->lrc_desc = desc;
}

static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
void *shadow_ring_buffer_va;
u32 *cs;

/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs)) {
gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
workload->rb_len);
return PTR_ERR(cs);
}

shadow_ring_buffer_va = workload->shadow_ring_buffer_va;

/* get shadow ring buffer va */
workload->shadow_ring_buffer_va = cs;

memcpy(cs, shadow_ring_buffer_va,
workload->rb_len);

cs += workload->rb_len / sizeof(u32);
intel_ring_advance(workload->req, cs);

return 0;
}

/**
* intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
* shadow it as well, include ringbuffer,wa_ctx and ctx.
Expand All @@ -214,8 +242,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct drm_i915_gem_request *rq;
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_ring *ring;
int ret;

lockdep_assert_held(&dev_priv->drm.struct_mutex);
Expand All @@ -231,17 +261,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
shadow_context_descriptor_update(shadow_ctx,
dev_priv->engine[ring_id]);

rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq);
goto out;
}

gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);

workload->req = i915_gem_request_get(rq);

ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
if (ret)
goto out;
Expand All @@ -253,10 +272,37 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
goto out;
}

/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
* moment, i915 may already unpined the shadow context to make the
* shadow_ctx pages invalid. So gvt need to pin itself. After update
* the guest context, gvt can unpin the shadow_ctx safely.
*/
ring = engine->context_pin(engine, shadow_ctx);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
gvt_vgpu_err("fail to pin shadow context\n");
goto out;
}

ret = populate_shadow_context(workload);
if (ret)
goto out;

rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
ret = PTR_ERR(rq);
goto out;
}

gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);

workload->req = i915_gem_request_get(rq);
ret = copy_workload_to_ring_buffer(workload);
if (ret)
goto out;
workload->shadowed = true;

out:
Expand All @@ -269,8 +315,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
struct intel_engine_cs *engine = dev_priv->engine[ring_id];
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_ring *ring;
int ret = 0;

gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
Expand All @@ -288,20 +332,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
goto out;
}

/* pin shadow context by gvt even the shadow context will be pinned
* when i915 alloc request. That is because gvt will update the guest
* context from shadow context when workload is completed, and at that
* moment, i915 may already unpined the shadow context to make the
* shadow_ctx pages invalid. So gvt need to pin itself. After update
* the guest context, gvt can unpin the shadow_ctx safely.
*/
ring = engine->context_pin(engine, shadow_ctx);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
gvt_vgpu_err("fail to pin shadow context\n");
goto out;
}

out:
if (ret)
workload->status = ret;
Expand Down

0 comments on commit 0a53bc0

Please sign in to comment.