Skip to content

Commit

Permalink
drm/v3d: Add support for submitting jobs to the TFU.
Browse files Browse the repository at this point in the history
The TFU can copy from raster, UIF, and SAND input images to UIF output
images, with optional mipmap generation.  This will certainly be
useful for media EGL image input, but is also useful immediately for
mipmap generation without bogging the V3D core down.

For now we only run the queue 1 job deep, and don't have any hang
recovery (though I don't think we should need it, with TFU).  Queuing
multiple jobs in the HW will require synchronizing the YUV coefficient
regs updates since they don't get FIFOed with the job.

v2: Change the ioctl to IOW instead of IOWR, always set COEF0, explain
    why TFU is AUTH, clarify the syncing docs, drop the unused TFU
    interrupt regs (you're expected to use the hub's), don't take
    &bo->base for NULL bos.
v3: Fix a little whitespace alignment (noticed by checkpatch), rebase
    on drm_sched_job_cleanup() changes.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Dave Emett <david.emett@broadcom.com> (v2)
Link: https://patchwork.freedesktop.org/patch/264607/
  • Loading branch information
Eric Anholt committed Nov 30, 2018
1 parent 1d8224e commit 1584f16
Show file tree
Hide file tree
Showing 8 changed files with 427 additions and 51 deletions.
15 changes: 11 additions & 4 deletions drivers/gpu/drm/v3d/v3d_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,15 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
return 0;
}

/* Any params that aren't just register reads would go here. */

DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
switch (args->param) {
case DRM_V3D_PARAM_SUPPORTS_TFU:
args->value = 1;
return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
}
}

static int
Expand Down Expand Up @@ -170,7 +175,8 @@ static const struct file_operations v3d_drm_fops = {
/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
* protection between clients. Note that render nodes would be be
* able to submit CLs that could access BOs from clients authenticated
* with the master node.
* with the master node. The TFU doesn't use the GMP, so it would
* need to stay DRM_AUTH until we do buffer size/offset validation.
*/
static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
Expand All @@ -179,6 +185,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
};

static const struct vm_operations_struct v3d_vm_ops = {
Expand Down
32 changes: 27 additions & 5 deletions drivers/gpu/drm/v3d/v3d_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
#include <drm/drm_encoder.h>
#include <drm/drm_gem.h>
#include <drm/gpu_scheduler.h>
#include "uapi/drm/v3d_drm.h"

#define GMP_GRANULARITY (128 * 1024)

/* Enum for each of the V3D queues. We maintain various queue
* tracking as an array because at some point we'll want to support
* the TFU (texture formatting unit) as another queue.
*/
/* Enum for each of the V3D queues. */
enum v3d_queue {
V3D_BIN,
V3D_RENDER,
V3D_TFU,
};

#define V3D_MAX_QUEUES (V3D_RENDER + 1)
#define V3D_MAX_QUEUES (V3D_TFU + 1)

struct v3d_queue_state {
struct drm_gpu_scheduler sched;
Expand Down Expand Up @@ -68,6 +67,7 @@ struct v3d_dev {

struct v3d_exec_info *bin_job;
struct v3d_exec_info *render_job;
struct v3d_tfu_job *tfu_job;

struct v3d_queue_state queue[V3D_MAX_QUEUES];

Expand Down Expand Up @@ -218,6 +218,25 @@ struct v3d_exec_info {
u32 qma, qms, qts;
};

struct v3d_tfu_job {
struct drm_sched_job base;

struct drm_v3d_submit_tfu args;

/* An optional fence userspace can pass in for the job to depend on. */
struct dma_fence *in_fence;

/* v3d fence to be signaled by IRQ handler when the job is complete. */
struct dma_fence *done_fence;

struct v3d_dev *v3d;

struct kref refcount;

/* This is the array of BOs that were looked up at the start of exec. */
struct v3d_bo *bo[4];
};

/**
* _wait_for - magic (register) wait macro
*
Expand Down Expand Up @@ -281,9 +300,12 @@ int v3d_gem_init(struct drm_device *dev);
void v3d_gem_destroy(struct drm_device *dev);
int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
void v3d_exec_put(struct v3d_exec_info *exec);
void v3d_tfu_job_put(struct v3d_tfu_job *exec);
void v3d_reset(struct v3d_dev *v3d);
void v3d_invalidate_caches(struct v3d_dev *v3d);
void v3d_flush_caches(struct v3d_dev *v3d);
Expand Down
178 changes: 158 additions & 20 deletions drivers/gpu/drm/v3d/v3d_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,26 +207,27 @@ v3d_flush_caches(struct v3d_dev *v3d)
}

static void
v3d_attach_object_fences(struct v3d_exec_info *exec)
v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
struct dma_fence *fence)
{
struct dma_fence *out_fence = exec->render_done_fence;
int i;

for (i = 0; i < exec->bo_count; i++) {
for (i = 0; i < bo_count; i++) {
/* XXX: Use shared fences for read-only objects. */
reservation_object_add_excl_fence(exec->bo[i]->resv, out_fence);
reservation_object_add_excl_fence(bos[i]->resv, fence);
}
}

static void
v3d_unlock_bo_reservations(struct drm_device *dev,
struct v3d_exec_info *exec,
struct v3d_bo **bos,
int bo_count,
struct ww_acquire_ctx *acquire_ctx)
{
int i;

for (i = 0; i < exec->bo_count; i++)
ww_mutex_unlock(&exec->bo[i]->resv->lock);
for (i = 0; i < bo_count; i++)
ww_mutex_unlock(&bos[i]->resv->lock);

ww_acquire_fini(acquire_ctx);
}
Expand All @@ -240,7 +241,8 @@ v3d_unlock_bo_reservations(struct drm_device *dev,
*/
static int
v3d_lock_bo_reservations(struct drm_device *dev,
struct v3d_exec_info *exec,
struct v3d_bo **bos,
int bo_count,
struct ww_acquire_ctx *acquire_ctx)
{
int contended_lock = -1;
Expand All @@ -250,7 +252,7 @@ v3d_lock_bo_reservations(struct drm_device *dev,

retry:
if (contended_lock != -1) {
struct v3d_bo *bo = exec->bo[contended_lock];
struct v3d_bo *bo = bos[contended_lock];

ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
acquire_ctx);
Expand All @@ -260,20 +262,20 @@ v3d_lock_bo_reservations(struct drm_device *dev,
}
}

for (i = 0; i < exec->bo_count; i++) {
for (i = 0; i < bo_count; i++) {
if (i == contended_lock)
continue;

ret = ww_mutex_lock_interruptible(&exec->bo[i]->resv->lock,
ret = ww_mutex_lock_interruptible(&bos[i]->resv->lock,
acquire_ctx);
if (ret) {
int j;

for (j = 0; j < i; j++)
ww_mutex_unlock(&exec->bo[j]->resv->lock);
ww_mutex_unlock(&bos[j]->resv->lock);

if (contended_lock != -1 && contended_lock >= i) {
struct v3d_bo *bo = exec->bo[contended_lock];
struct v3d_bo *bo = bos[contended_lock];

ww_mutex_unlock(&bo->resv->lock);
}
Expand All @@ -293,10 +295,11 @@ v3d_lock_bo_reservations(struct drm_device *dev,
/* Reserve space for our shared (read-only) fence references,
* before we commit the CL to the hardware.
*/
for (i = 0; i < exec->bo_count; i++) {
ret = reservation_object_reserve_shared(exec->bo[i]->resv, 1);
for (i = 0; i < bo_count; i++) {
ret = reservation_object_reserve_shared(bos[i]->resv, 1);
if (ret) {
v3d_unlock_bo_reservations(dev, exec, acquire_ctx);
v3d_unlock_bo_reservations(dev, bos, bo_count,
acquire_ctx);
return ret;
}
}
Expand Down Expand Up @@ -419,6 +422,33 @@ void v3d_exec_put(struct v3d_exec_info *exec)
kref_put(&exec->refcount, v3d_exec_cleanup);
}

static void
v3d_tfu_job_cleanup(struct kref *ref)
{
struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
refcount);
struct v3d_dev *v3d = job->v3d;
unsigned int i;

dma_fence_put(job->in_fence);
dma_fence_put(job->done_fence);

for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
if (job->bo[i])
drm_gem_object_put_unlocked(&job->bo[i]->base);
}

pm_runtime_mark_last_busy(v3d->dev);
pm_runtime_put_autosuspend(v3d->dev);

kfree(job);
}

void v3d_tfu_job_put(struct v3d_tfu_job *job)
{
kref_put(&job->refcount, v3d_tfu_job_cleanup);
}

int
v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
Expand Down Expand Up @@ -536,7 +566,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;

ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx);
ret = v3d_lock_bo_reservations(dev, exec->bo, exec->bo_count,
&acquire_ctx);
if (ret)
goto fail;

Expand Down Expand Up @@ -570,9 +601,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
&v3d_priv->sched_entity[V3D_RENDER]);
mutex_unlock(&v3d->sched_lock);

v3d_attach_object_fences(exec);
v3d_attach_object_fences(exec->bo, exec->bo_count,
exec->render_done_fence);

v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx);

/* Update the return sync object for the */
sync_out = drm_syncobj_find(file_priv, args->out_sync);
Expand All @@ -588,13 +620,119 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,

fail_unreserve:
mutex_unlock(&v3d->sched_lock);
v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx);
fail:
v3d_exec_put(exec);

return ret;
}

/**
* v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
* @dev: DRM device
* @data: ioctl argument
* @file_priv: DRM file for this fd
*
* Userspace provides the register setup for the TFU, which we don't
* need to validate since the TFU is behind the MMU.
*/
int
v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct v3d_dev *v3d = to_v3d_dev(dev);
struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
struct drm_v3d_submit_tfu *args = data;
struct v3d_tfu_job *job;
struct ww_acquire_ctx acquire_ctx;
struct drm_syncobj *sync_out;
struct dma_fence *sched_done_fence;
int ret = 0;
int bo_count;

job = kcalloc(1, sizeof(*job), GFP_KERNEL);
if (!job)
return -ENOMEM;

ret = pm_runtime_get_sync(v3d->dev);
if (ret < 0) {
kfree(job);
return ret;
}

kref_init(&job->refcount);

ret = drm_syncobj_find_fence(file_priv, args->in_sync,
0, 0, &job->in_fence);
if (ret == -EINVAL)
goto fail;

job->args = *args;
job->v3d = v3d;

spin_lock(&file_priv->table_lock);
for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
struct drm_gem_object *bo;

if (!args->bo_handles[bo_count])
break;

bo = idr_find(&file_priv->object_idr,
args->bo_handles[bo_count]);
if (!bo) {
DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
bo_count, args->bo_handles[bo_count]);
ret = -ENOENT;
spin_unlock(&file_priv->table_lock);
goto fail;
}
drm_gem_object_get(bo);
job->bo[bo_count] = to_v3d_bo(bo);
}
spin_unlock(&file_priv->table_lock);

ret = v3d_lock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx);
if (ret)
goto fail;

mutex_lock(&v3d->sched_lock);
ret = drm_sched_job_init(&job->base,
&v3d_priv->sched_entity[V3D_TFU],
v3d_priv);
if (ret)
goto fail_unreserve;

sched_done_fence = dma_fence_get(&job->base.s_fence->finished);

kref_get(&job->refcount); /* put by scheduler job completion */
drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
mutex_unlock(&v3d->sched_lock);

v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);

v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx);

/* Update the return sync object */
sync_out = drm_syncobj_find(file_priv, args->out_sync);
if (sync_out) {
drm_syncobj_replace_fence(sync_out, 0, sched_done_fence);
drm_syncobj_put(sync_out);
}
dma_fence_put(sched_done_fence);

v3d_tfu_job_put(job);

return 0;

fail_unreserve:
mutex_unlock(&v3d->sched_lock);
v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx);
fail:
v3d_tfu_job_put(job);

return ret;
}

int
v3d_gem_init(struct drm_device *dev)
{
Expand Down
Loading

0 comments on commit 1584f16

Please sign in to comment.