Skip to content

Commit

Permalink
drm/v3d: Expose the total GPU usage stats on sysfs
Browse files Browse the repository at this point in the history
The previous patch exposed the accumulated amount of active time per
client for each V3D queue. But this doesn't provide a global notion of
the GPU usage.

Therefore, provide the accumulated amount of active time for each V3D
queue (BIN, RENDER, CSD, TFU and CACHE_CLEAN), considering all the jobs
submitted to the queue, independent of the client.

This data is exposed through the sysfs interface, so that if the
interface is queried at two different points of time the usage percentage
of each of the queues can be calculated.

Co-developed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Acked-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230905213416.1290219-3-mcanal@igalia.com
  • Loading branch information
Maíra Canal committed Nov 6, 2023
1 parent 09a93cc commit 509433d
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 3 deletions.
3 changes: 2 additions & 1 deletion drivers/gpu/drm/v3d/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ v3d-y := \
v3d_mmu.o \
v3d_perfmon.o \
v3d_trace_points.o \
v3d_sched.o
v3d_sched.o \
v3d_sysfs.o

v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o

Expand Down
9 changes: 9 additions & 0 deletions drivers/gpu/drm/v3d/v3d_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,14 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
if (ret)
goto irq_disable;

ret = v3d_sysfs_init(dev);
if (ret)
goto drm_unregister;

return 0;

drm_unregister:
drm_dev_unregister(drm);
irq_disable:
v3d_irq_disable(v3d);
gem_destroy:
Expand All @@ -331,6 +337,9 @@ static void v3d_platform_drm_remove(struct platform_device *pdev)
{
struct drm_device *drm = platform_get_drvdata(pdev);
struct v3d_dev *v3d = to_v3d_dev(drm);
struct device *dev = &pdev->dev;

v3d_sysfs_destroy(dev);

drm_dev_unregister(drm);

Expand Down
8 changes: 8 additions & 0 deletions drivers/gpu/drm/v3d/v3d_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ struct v3d_queue_state {

u64 fence_context;
u64 emit_seqno;

u64 start_ns;
u64 enabled_ns;
u64 jobs_sent;
};

/* Performance monitor object. The perform lifetime is controlled by userspace
Expand Down Expand Up @@ -441,3 +445,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);

/* v3d_sysfs.c */
int v3d_sysfs_init(struct device *dev);
void v3d_sysfs_destroy(struct device *dev);
6 changes: 5 additions & 1 deletion drivers/gpu/drm/v3d/v3d_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,8 +1014,12 @@ v3d_gem_init(struct drm_device *dev)
u32 pt_size = 4096 * 1024;
int ret, i;

for (i = 0; i < V3D_MAX_QUEUES; i++)
for (i = 0; i < V3D_MAX_QUEUES; i++) {
v3d->queue[i].fence_context = dma_fence_context_alloc(1);
v3d->queue[i].start_ns = 0;
v3d->queue[i].enabled_ns = 0;
v3d->queue[i].jobs_sent = 0;
}

spin_lock_init(&v3d->mm_lock);
spin_lock_init(&v3d->job_lock);
Expand Down
28 changes: 28 additions & 0 deletions drivers/gpu/drm/v3d/v3d_irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,17 @@ v3d_irq(int irq, void *arg)
struct v3d_fence *fence =
to_v3d_fence(v3d->bin_job->base.irq_fence);
struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_BIN];

file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
file->jobs_sent[V3D_BIN]++;
v3d->queue[V3D_BIN].jobs_sent++;

file->start_ns[V3D_BIN] = 0;
v3d->queue[V3D_BIN].start_ns = 0;

file->enabled_ns[V3D_BIN] += runtime;
v3d->queue[V3D_BIN].enabled_ns += runtime;

trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
Expand All @@ -117,10 +124,17 @@ v3d_irq(int irq, void *arg)
struct v3d_fence *fence =
to_v3d_fence(v3d->render_job->base.irq_fence);
struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_RENDER];

file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
file->jobs_sent[V3D_RENDER]++;
v3d->queue[V3D_RENDER].jobs_sent++;

file->start_ns[V3D_RENDER] = 0;
v3d->queue[V3D_RENDER].start_ns = 0;

file->enabled_ns[V3D_RENDER] += runtime;
v3d->queue[V3D_RENDER].enabled_ns += runtime;

trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
Expand All @@ -131,10 +145,17 @@ v3d_irq(int irq, void *arg)
struct v3d_fence *fence =
to_v3d_fence(v3d->csd_job->base.irq_fence);
struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_CSD];

file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
file->jobs_sent[V3D_CSD]++;
v3d->queue[V3D_CSD].jobs_sent++;

file->start_ns[V3D_CSD] = 0;
v3d->queue[V3D_CSD].start_ns = 0;

file->enabled_ns[V3D_CSD] += runtime;
v3d->queue[V3D_CSD].enabled_ns += runtime;

trace_v3d_csd_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
Expand Down Expand Up @@ -172,10 +193,17 @@ v3d_hub_irq(int irq, void *arg)
struct v3d_fence *fence =
to_v3d_fence(v3d->tfu_job->base.irq_fence);
struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_TFU];

file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
file->jobs_sent[V3D_TFU]++;
v3d->queue[V3D_TFU].jobs_sent++;

file->start_ns[V3D_TFU] = 0;
v3d->queue[V3D_TFU].start_ns = 0;

file->enabled_ns[V3D_TFU] += runtime;
v3d->queue[V3D_TFU].enabled_ns += runtime;

trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
Expand Down
15 changes: 14 additions & 1 deletion drivers/gpu/drm/v3d/v3d_sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
job->start, job->end);

file->start_ns[V3D_BIN] = local_clock();
v3d->queue[V3D_BIN].start_ns = file->start_ns[V3D_BIN];

v3d_switch_perfmon(v3d, &job->base);

Expand Down Expand Up @@ -164,6 +165,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
job->start, job->end);

file->start_ns[V3D_RENDER] = local_clock();
v3d->queue[V3D_RENDER].start_ns = file->start_ns[V3D_RENDER];

v3d_switch_perfmon(v3d, &job->base);

Expand Down Expand Up @@ -199,6 +201,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);

file->start_ns[V3D_TFU] = local_clock();
v3d->queue[V3D_TFU].start_ns = file->start_ns[V3D_TFU];

V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia);
V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis);
Expand Down Expand Up @@ -245,6 +248,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);

file->start_ns[V3D_CSD] = local_clock();
v3d->queue[V3D_CSD].start_ns = file->start_ns[V3D_CSD];

v3d_switch_perfmon(v3d, &job->base);

Expand All @@ -264,14 +268,23 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_dev *v3d = job->v3d;
struct v3d_file_priv *file = job->file->driver_priv;
u64 runtime;

file->start_ns[V3D_CACHE_CLEAN] = local_clock();
v3d->queue[V3D_CACHE_CLEAN].start_ns = file->start_ns[V3D_CACHE_CLEAN];

v3d_clean_caches(v3d);

file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN];
runtime = local_clock() - file->start_ns[V3D_CACHE_CLEAN];

file->enabled_ns[V3D_CACHE_CLEAN] += runtime;
v3d->queue[V3D_CACHE_CLEAN].enabled_ns += runtime;

file->jobs_sent[V3D_CACHE_CLEAN]++;
v3d->queue[V3D_CACHE_CLEAN].jobs_sent++;

file->start_ns[V3D_CACHE_CLEAN] = 0;
v3d->queue[V3D_CACHE_CLEAN].start_ns = 0;

return NULL;
}
Expand Down
69 changes: 69 additions & 0 deletions drivers/gpu/drm/v3d/v3d_sysfs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2023 Igalia S.L.
*/

#include <linux/sched/clock.h>
#include <linux/sysfs.h>

#include "v3d_drv.h"

static ssize_t
gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct v3d_dev *v3d = to_v3d_dev(drm);
enum v3d_queue queue;
u64 timestamp = local_clock();
u64 active_runtime;
ssize_t len = 0;

len += sysfs_emit(buf, "queue\ttimestamp\tjobs\truntime\n");

for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
if (v3d->queue[queue].start_ns)
active_runtime = timestamp - v3d->queue[queue].start_ns;
else
active_runtime = 0;

/* Each line will display the queue name, timestamp, the number
* of jobs sent to that queue and the runtime, as can be seem here:
*
* queue timestamp jobs runtime
* bin 239043069420 22620 17438164056
* render 239043069420 22619 27284814161
* tfu 239043069420 8763 394592566
* csd 239043069420 3168 10787905530
* cache_clean 239043069420 6127 237375940
*/
len += sysfs_emit_at(buf, len, "%s\t%llu\t%llu\t%llu\n",
v3d_queue_to_string(queue),
timestamp,
v3d->queue[queue].jobs_sent,
v3d->queue[queue].enabled_ns + active_runtime);
}

return len;
}
static DEVICE_ATTR_RO(gpu_stats);

static struct attribute *v3d_sysfs_entries[] = {
&dev_attr_gpu_stats.attr,
NULL,
};

static struct attribute_group v3d_sysfs_attr_group = {
.attrs = v3d_sysfs_entries,
};

int
v3d_sysfs_init(struct device *dev)
{
return sysfs_create_group(&dev->kobj, &v3d_sysfs_attr_group);
}

void
v3d_sysfs_destroy(struct device *dev)
{
return sysfs_remove_group(&dev->kobj, &v3d_sysfs_attr_group);
}

0 comments on commit 509433d

Please sign in to comment.