Skip to content

Commit

Permalink
drm/amdkfd: SMI report dropped event count
Browse files Browse the repository at this point in the history
Add new SMI event to report the dropped event count.

When the event kfifo is full, drop count is not zero, or no enough space
left to store the event message, increase drop count.

After reading event out from kfifo, if event was dropped, drop_count is
not zero, generate a dropped event record and reset drop count to zero.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Philip Yang authored and Alex Deucher committed Oct 7, 2024
1 parent 012be6f commit a3ab2d4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
27 changes: 23 additions & 4 deletions drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct kfd_smi_client {
struct rcu_head rcu;
pid_t pid;
bool suser;
u32 drop_count;
};

#define KFD_MAX_KFIFO_SIZE 8192
Expand Down Expand Up @@ -103,12 +104,28 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
}
to_copy = min(size, to_copy);
ret = kfifo_out(&client->fifo, buf, to_copy);
spin_unlock(&client->lock);
if (ret <= 0) {
spin_unlock(&client->lock);
ret = -EAGAIN;
goto ret_err;
}

if (client->drop_count) {
char msg[KFD_SMI_EVENT_MSG_SIZE];
int len;

len = snprintf(msg, sizeof(msg), "%x ", KFD_SMI_EVENT_DROPPED_EVENT);
len += snprintf(msg + len, sizeof(msg) - len,
KFD_EVENT_FMT_DROPPED_EVENT(ktime_get_boottime_ns(),
client->pid, client->drop_count));
if (kfifo_avail(&client->fifo) >= len) {
kfifo_in(&client->fifo, msg, len);
client->drop_count = 0;
}
}

spin_unlock(&client->lock);

ret = copy_to_user(user, buf, to_copy);
if (ret) {
ret = -EFAULT;
Expand Down Expand Up @@ -182,13 +199,15 @@ static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev,
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
if (!kfd_smi_ev_enabled(pid, client, smi_event))
continue;

spin_lock(&client->lock);
if (kfifo_avail(&client->fifo) >= len) {
if (!client->drop_count && kfifo_avail(&client->fifo) >= len) {
kfifo_in(&client->fifo, event_msg, len);
wake_up_all(&client->wait_queue);
} else {
pr_debug("smi_event(EventID: %u): no space left\n",
smi_event);
client->drop_count++;
pr_debug("smi_event(EventID: %u): no space left drop_count %d\n",
smi_event, client->drop_count);
}
spin_unlock(&client->lock);
}
Expand Down
6 changes: 6 additions & 0 deletions include/uapi/linux/kfd_ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ enum kfd_smi_event {
KFD_SMI_EVENT_QUEUE_EVICTION = 9,
KFD_SMI_EVENT_QUEUE_RESTORE = 10,
KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
KFD_SMI_EVENT_DROPPED_EVENT = 12,

/*
* max event number, as a flag bit to get events from all processes,
Expand Down Expand Up @@ -610,6 +611,7 @@ struct kfd_ioctl_smi_events_args {
* rw: 'W' for write page fault, 'R' for read page fault
* rescheduled: 'R' if the queue restore failed and rescheduled to try again
* error_code: migrate failure error code, 0 if no error
* drop_count: how many events dropped when fifo is full
*/
#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
"%x %s\n", (reset_seq_num), (reset_cause)
Expand Down Expand Up @@ -645,6 +647,10 @@ struct kfd_ioctl_smi_events_args {
"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
(node), (unmap_trigger)

#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\
"%lld -%d %d\n", (ns), (pid), (drop_count)


/**************************************************************************************************
* CRIU IOCTLs (Checkpoint Restore In Userspace)
*
Expand Down

0 comments on commit a3ab2d4

Please sign in to comment.