Skip to content

Commit

Permalink
drm/vmwgfx: Use the cpu blit utility for framebuffer to screen target…
Browse files Browse the repository at this point in the history
… blits

This blit was previously performed using two large vmaps, one of which
was teared down and remapped on each blit. Use the more resource-
conserving TTM cpu blit instead.

The blit is used in boundary-box computing mode which makes it possible
to minimize the bounding box used in host operations.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
  • Loading branch information
Thomas Hellstrom committed Mar 22, 2018
1 parent 79273e1 commit ef86cfe
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 91 deletions.
23 changes: 23 additions & 0 deletions drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,22 @@ static const struct ttm_place evictable_placement_flags[] = {
}
};

static const struct ttm_place nonfixed_placement_flags[] = {
{
.fpfn = 0,
.lpfn = 0,
.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
}, {
.fpfn = 0,
.lpfn = 0,
.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
}, {
.fpfn = 0,
.lpfn = 0,
.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
}
};

struct ttm_placement vmw_evictable_placement = {
.num_placement = 4,
.placement = evictable_placement_flags,
Expand Down Expand Up @@ -213,6 +229,13 @@ struct ttm_placement vmw_mob_ne_placement = {
.busy_placement = &mob_ne_placement_flags
};

struct ttm_placement vmw_nonfixed_placement = {
.num_placement = 3,
.placement = nonfixed_placement_flags,
.num_busy_placement = 1,
.busy_placement = &sys_placement_flags
};

struct vmw_ttm_tt {
struct ttm_dma_tt dma_ttm;
struct vmw_private *dev_priv;
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,7 @@ extern struct ttm_placement vmw_evictable_placement;
extern struct ttm_placement vmw_srf_placement;
extern struct ttm_placement vmw_mob_placement;
extern struct ttm_placement vmw_mob_ne_placement;
extern struct ttm_placement vmw_nonfixed_placement;
extern struct ttm_bo_driver vmw_bo_driver;
extern int vmw_dma_quiescent(struct drm_device *dev);
extern int vmw_bo_map_dma(struct ttm_buffer_object *bo);
Expand Down
50 changes: 32 additions & 18 deletions drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
Original file line number Diff line number Diff line change
Expand Up @@ -682,9 +682,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
return NULL;

vps->pinned = 0;

/* Mapping is managed by prepare_fb/cleanup_fb */
memset(&vps->host_map, 0, sizeof(vps->host_map));
vps->cpp = 0;

/* Each ref counted resource needs to be acquired again */
Expand Down Expand Up @@ -746,11 +743,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,


/* Should have been freed by cleanup_fb */
if (vps->host_map.virtual) {
DRM_ERROR("Host mapping not freed\n");
ttm_bo_kunmap(&vps->host_map);
}

if (vps->surf)
vmw_surface_unreference(&vps->surf);

Expand Down Expand Up @@ -1129,12 +1121,14 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
};

/**
* Pin the dmabuffer to the start of vram.
* Pin the dmabuffer in a location suitable for access by the
* display system.
*/
static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
{
struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
struct vmw_dma_buffer *buf;
struct ttm_placement *placement;
int ret;

buf = vfb->dmabuf ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
Expand All @@ -1151,12 +1145,24 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
break;
case vmw_du_screen_object:
case vmw_du_screen_target:
if (vfb->dmabuf)
return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf,
false);
if (vfb->dmabuf) {
if (dev_priv->capabilities & SVGA_CAP_3D) {
/*
* Use surface DMA to get content to
* sreen target surface.
*/
placement = &vmw_vram_gmr_placement;
} else {
/* Use CPU blit. */
placement = &vmw_sys_placement;
}
} else {
/* Use surface / image update */
placement = &vmw_mob_placement;
}

return vmw_dmabuf_pin_in_placement(dev_priv, buf,
&vmw_mob_placement, false);
return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement,
false);
default:
return -EINVAL;
}
Expand Down Expand Up @@ -2419,14 +2425,21 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
struct vmw_dma_buffer *buf,
bool interruptible,
bool validate_as_mob)
bool validate_as_mob,
bool for_cpu_blit)
{
struct ttm_operation_ctx ctx = {
.interruptible = interruptible,
.no_wait_gpu = false};
struct ttm_buffer_object *bo = &buf->base;
int ret;

ttm_bo_reserve(bo, false, false, NULL);
ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
validate_as_mob);
if (for_cpu_blit)
ret = ttm_bo_validate(bo, &vmw_nonfixed_placement, &ctx);
else
ret = vmw_validate_single_buffer(dev_priv, bo, interruptible,
validate_as_mob);
if (ret)
ttm_bo_unreserve(bo);

Expand Down Expand Up @@ -2538,7 +2551,8 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
if (res->backup) {
ret = vmw_kms_helper_buffer_prepare(res->dev_priv, res->backup,
interruptible,
res->dev_priv->has_mob);
res->dev_priv->has_mob,
false);
if (ret)
goto out_unreserve;
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ struct vmw_plane_state {
int pinned;

/* For CPU Blit */
struct ttm_bo_kmap_obj host_map;
unsigned int cpp;
};

Expand Down Expand Up @@ -289,7 +288,8 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
struct vmw_dma_buffer *buf,
bool interruptible,
bool validate_as_mob);
bool validate_as_mob,
bool for_cpu_blit);
void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf);
void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
struct drm_file *file_priv,
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
int ret;

ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
false);
false, false);
if (ret)
return ret;

Expand Down Expand Up @@ -1130,7 +1130,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv,
struct vmw_kms_dirty dirty;
int ret;

ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false);
ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, true, false,
false);
if (ret)
return ret;

Expand Down
105 changes: 36 additions & 69 deletions drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ struct vmw_screen_target_display_unit {
bool defined;

/* For CPU Blit */
struct ttm_bo_kmap_obj host_map;
unsigned int cpp;
};

Expand Down Expand Up @@ -639,10 +638,9 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
container_of(dirty->unit, typeof(*stdu), base);
s32 width, height;
s32 src_pitch, dst_pitch;
u8 *src, *dst;
bool not_used;
struct ttm_bo_kmap_obj guest_map;
int ret;
struct ttm_buffer_object *src_bo, *dst_bo;
u32 src_offset, dst_offset;
struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);

if (!dirty->num_hits)
return;
Expand All @@ -653,57 +651,38 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
if (width == 0 || height == 0)
return;

ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
&guest_map);
if (ret) {
DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
ret);
goto out_cleanup;
}

/* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
src = ttm_kmap_obj_virtual(&stdu->host_map, &not_used);
src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;

dst_pitch = ddirty->pitch;
dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
/* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */
dst_pitch = stdu->display_srf->base_size.width * stdu->cpp;
dst_bo = &stdu->display_srf->res.backup->base;
dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;

src_pitch = ddirty->pitch;
src_bo = &ddirty->buf->base;
src_offset = ddirty->fb_top * src_pitch + ddirty->fb_left * stdu->cpp;

/* Figure out the real direction */
if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
u8 *tmp;
s32 tmp_pitch;

tmp = src;
tmp_pitch = src_pitch;

src = dst;
src_pitch = dst_pitch;

dst = tmp;
dst_pitch = tmp_pitch;
/* Swap src and dst if the assumption was wrong. */
if (ddirty->transfer != SVGA3D_WRITE_HOST_VRAM) {
swap(dst_pitch, src_pitch);
swap(dst_bo, src_bo);
swap(src_offset, dst_offset);
}

/* CPU Blit */
while (height-- > 0) {
memcpy(dst, src, width * stdu->cpp);
dst += dst_pitch;
src += src_pitch;
}
(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
src_bo, src_offset, src_pitch,
width * stdu->cpp, height, &diff);

if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM) {
if (ddirty->transfer == SVGA3D_WRITE_HOST_VRAM &&
drm_rect_visible(&diff.rect)) {
struct vmw_private *dev_priv;
struct vmw_stdu_update *cmd;
struct drm_clip_rect region;
int ret;

/* We are updating the actual surface, not a proxy */
region.x1 = ddirty->left;
region.x2 = ddirty->right;
region.y1 = ddirty->top;
region.y2 = ddirty->bottom;
region.x1 = diff.rect.x1;
region.x2 = diff.rect.x2;
region.y1 = diff.rect.y1;
region.y2 = diff.rect.y2;
ret = vmw_kms_update_proxy(
(struct vmw_resource *) &stdu->display_srf->res,
(const struct drm_clip_rect *) &region, 1, 1);
Expand All @@ -720,13 +699,12 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
}

vmw_stdu_populate_update(cmd, stdu->base.unit,
ddirty->left, ddirty->right,
ddirty->top, ddirty->bottom);
region.x1, region.x2,
region.y1, region.y2);

vmw_fifo_commit(dev_priv, sizeof(*cmd));
}

ttm_bo_kunmap(&guest_map);
out_cleanup:
ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
ddirty->right = ddirty->bottom = S32_MIN;
Expand Down Expand Up @@ -772,9 +750,15 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
struct vmw_stdu_dirty ddirty;
int ret;
bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D);

/*
* VMs without 3D support don't have the surface DMA command and
* we'll be using a CPU blit, and the framebuffer should be moved out
* of VRAM.
*/
ret = vmw_kms_helper_buffer_prepare(dev_priv, buf, interruptible,
false);
false, cpu_blit);
if (ret)
return ret;

Expand All @@ -793,8 +777,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
if (to_surface)
ddirty.base.fifo_reserve_size += sizeof(struct vmw_stdu_update);

/* 2D VMs cannot use SVGA_3D_CMD_SURFACE_DMA so do CPU blit instead */
if (!(dev_priv->capabilities & SVGA_CAP_3D)) {

if (cpu_blit) {
ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit;
ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip;
ddirty.base.fifo_reserve_size = 0;
Expand Down Expand Up @@ -1071,9 +1055,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
{
struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);

if (vps->host_map.virtual)
ttm_bo_kunmap(&vps->host_map);

if (vps->surf)
WARN_ON(!vps->pinned);

Expand Down Expand Up @@ -1235,24 +1216,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
* so cache these mappings
*/
if (vps->content_fb_type == SEPARATE_DMA &&
!(dev_priv->capabilities & SVGA_CAP_3D)) {
ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
vps->surf->res.backup->base.num_pages,
&vps->host_map);
if (ret) {
DRM_ERROR("Failed to map display buffer to CPU\n");
goto out_srf_unpin;
}

!(dev_priv->capabilities & SVGA_CAP_3D))
vps->cpp = new_fb->pitches[0] / new_fb->width;
}

return 0;

out_srf_unpin:
vmw_resource_unpin(&vps->surf->res);
vps->pinned--;

out_srf_unref:
vmw_surface_unreference(&vps->surf);
return ret;
Expand Down Expand Up @@ -1296,7 +1264,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
stdu->display_srf = vps->surf;
stdu->content_fb_type = vps->content_fb_type;
stdu->cpp = vps->cpp;
memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));

vclips.x = crtc->x;
vclips.y = crtc->y;
Expand Down

0 comments on commit ef86cfe

Please sign in to comment.