Skip to content

Commit

Permalink
accel/ivpu: Add NPU profiling support
Browse files Browse the repository at this point in the history
Implement time based Metric Streamer profiling UAPI.

This is a generic mechanism allowing user mode tools to sample
NPU metrics. These metrics are defined by the FW and transparent to
the driver.

The user space can check for this feature by checking
DRM_IVPU_CAP_METRIC_STREAMER driver capability.

Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513120431.3187212-9-jacek.lawrynowicz@linux.intel.com
  • Loading branch information
Tomasz Rusinowicz authored and Jacek Lawrynowicz committed May 15, 2024
1 parent 68ca7b0 commit cdfad4d
Show file tree
Hide file tree
Showing 9 changed files with 540 additions and 4 deletions.
3 changes: 2 additions & 1 deletion drivers/accel/ivpu/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (C) 2023 Intel Corporation
# Copyright (C) 2023-2024 Intel Corporation

intel_vpu-y := \
ivpu_drv.o \
Expand All @@ -13,6 +13,7 @@ intel_vpu-y := \
ivpu_jsm_msg.o \
ivpu_mmu.o \
ivpu_mmu_context.o \
ivpu_ms.o \
ivpu_pm.o

intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
Expand Down
14 changes: 13 additions & 1 deletion drivers/accel/ivpu/ivpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "ivpu_jsm_msg.h"
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
#include "ivpu_ms.h"
#include "ivpu_pm.h"

#ifndef DRIVER_VERSION_STR
Expand Down Expand Up @@ -100,6 +101,7 @@ static void file_priv_release(struct kref *ref)
mutex_unlock(&vdev->context_list_lock);
pm_runtime_put_autosuspend(vdev->drm.dev);

mutex_destroy(&file_priv->ms_lock);
mutex_destroy(&file_priv->lock);
kfree(file_priv);
}
Expand All @@ -122,7 +124,7 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
{
switch (args->index) {
case DRM_IVPU_CAP_METRIC_STREAMER:
args->value = 0;
args->value = 1;
break;
case DRM_IVPU_CAP_DMA_MEMORY_RANGE:
args->value = 1;
Expand Down Expand Up @@ -231,10 +233,13 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
goto err_dev_exit;
}

INIT_LIST_HEAD(&file_priv->ms_instance_list);

file_priv->vdev = vdev;
file_priv->bound = true;
kref_init(&file_priv->ref);
mutex_init(&file_priv->lock);
mutex_init(&file_priv->ms_lock);

mutex_lock(&vdev->context_list_lock);

Expand Down Expand Up @@ -263,6 +268,7 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
xa_erase_irq(&vdev->context_xa, ctx_id);
err_unlock:
mutex_unlock(&vdev->context_list_lock);
mutex_destroy(&file_priv->ms_lock);
mutex_destroy(&file_priv->lock);
kfree(file_priv);
err_dev_exit:
Expand All @@ -278,6 +284,7 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file)
ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n",
file_priv->ctx.id, current->comm, task_pid_nr(current));

ivpu_ms_cleanup(file_priv);
ivpu_file_priv_put(&file_priv);
}

Expand All @@ -288,6 +295,10 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_START, ivpu_ms_start_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0),
DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0),
};

static int ivpu_wait_for_ready(struct ivpu_device *vdev)
Expand Down Expand Up @@ -638,6 +649,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_prepare_for_reset(vdev);
ivpu_shutdown(vdev);

ivpu_ms_cleanup_all(vdev);
ivpu_jobs_abort_all(vdev);
ivpu_job_done_consumer_fini(vdev);
ivpu_pm_cancel_recovery(vdev);
Expand Down
3 changes: 3 additions & 0 deletions drivers/accel/ivpu/ivpu_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ struct ivpu_file_priv {
struct mutex lock; /* Protects cmdq */
struct ivpu_cmdq *cmdq[IVPU_NUM_CMDQS_PER_CTX];
struct ivpu_mmu_context ctx;
struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */
struct list_head ms_instance_list;
struct ivpu_bo *ms_info_bo;
bool has_mmu_faults;
bool bound;
};
Expand Down
98 changes: 98 additions & 0 deletions drivers/accel/ivpu/ivpu_jsm_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,101 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev)

return ret;
}

int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask,
u64 sampling_rate, u64 buffer_addr, u64 buffer_size)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_START };
struct vpu_jsm_msg resp;
int ret;

req.payload.metric_streamer_start.metric_group_mask = metric_group_mask;
req.payload.metric_streamer_start.sampling_rate = sampling_rate;
req.payload.metric_streamer_start.buffer_addr = buffer_addr;
req.payload.metric_streamer_start.buffer_size = buffer_size;

ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_START_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
if (ret) {
ivpu_warn_ratelimited(vdev, "Failed to start metric streamer: ret %d\n", ret);
return ret;
}

return ret;
}

int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_STOP };
struct vpu_jsm_msg resp;
int ret;

req.payload.metric_streamer_stop.metric_group_mask = metric_group_mask;

ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
if (ret)
ivpu_warn_ratelimited(vdev, "Failed to stop metric streamer: ret %d\n", ret);

return ret;
}

int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask,
u64 buffer_addr, u64 buffer_size, u64 *bytes_written)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_UPDATE };
struct vpu_jsm_msg resp;
int ret;

req.payload.metric_streamer_update.metric_group_mask = metric_group_mask;
req.payload.metric_streamer_update.buffer_addr = buffer_addr;
req.payload.metric_streamer_update.buffer_size = buffer_size;

ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
if (ret) {
ivpu_warn_ratelimited(vdev, "Failed to update metric streamer: ret %d\n", ret);
return ret;
}

if (buffer_size && resp.payload.metric_streamer_done.bytes_written > buffer_size) {
ivpu_warn_ratelimited(vdev, "MS buffer overflow: bytes_written %#llx > buffer_size %#llx\n",
resp.payload.metric_streamer_done.bytes_written, buffer_size);
return -EOVERFLOW;
}

*bytes_written = resp.payload.metric_streamer_done.bytes_written;

return ret;
}

int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr,
u64 buffer_size, u32 *sample_size, u64 *info_size)
{
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_INFO };
struct vpu_jsm_msg resp;
int ret;

req.payload.metric_streamer_start.metric_group_mask = metric_group_mask;
req.payload.metric_streamer_start.buffer_addr = buffer_addr;
req.payload.metric_streamer_start.buffer_size = buffer_size;

ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
if (ret) {
ivpu_warn_ratelimited(vdev, "Failed to get metric streamer info: ret %d\n", ret);
return ret;
}

if (!resp.payload.metric_streamer_done.sample_size) {
ivpu_warn_ratelimited(vdev, "Invalid sample size\n");
return -EBADMSG;
}

if (sample_size)
*sample_size = resp.payload.metric_streamer_done.sample_size;
if (info_size)
*info_size = resp.payload.metric_streamer_done.bytes_written;

return ret;
}
8 changes: 7 additions & 1 deletion drivers/accel/ivpu/ivpu_jsm_msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,11 @@ int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_
int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid,
u64 vpu_log_buffer_va);
int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev);

int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask,
u64 sampling_rate, u64 buffer_addr, u64 buffer_size);
int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask);
int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask,
u64 buffer_addr, u64 buffer_size, u64 *bytes_written);
int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr,
u64 buffer_size, u32 *sample_size, u64 *info_size);
#endif
Loading

0 comments on commit cdfad4d

Please sign in to comment.