diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 9b8169761ec5b..c8d67d62ca3f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -42,6 +42,7 @@ struct kfd_smi_client { struct rcu_head rcu; pid_t pid; bool suser; + u32 drop_count; }; #define KFD_MAX_KFIFO_SIZE 8192 @@ -103,12 +104,28 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, } to_copy = min(size, to_copy); ret = kfifo_out(&client->fifo, buf, to_copy); - spin_unlock(&client->lock); if (ret <= 0) { + spin_unlock(&client->lock); ret = -EAGAIN; goto ret_err; } + if (client->drop_count) { + char msg[KFD_SMI_EVENT_MSG_SIZE]; + int len; + + len = snprintf(msg, sizeof(msg), "%x ", KFD_SMI_EVENT_DROPPED_EVENT); + len += snprintf(msg + len, sizeof(msg) - len, + KFD_EVENT_FMT_DROPPED_EVENT(ktime_get_boottime_ns(), + client->pid, client->drop_count)); + if (kfifo_avail(&client->fifo) >= len) { + kfifo_in(&client->fifo, msg, len); + client->drop_count = 0; + } + } + + spin_unlock(&client->lock); + ret = copy_to_user(user, buf, to_copy); if (ret) { ret = -EFAULT; @@ -182,13 +199,15 @@ static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev, list_for_each_entry_rcu(client, &dev->smi_clients, list) { if (!kfd_smi_ev_enabled(pid, client, smi_event)) continue; + spin_lock(&client->lock); - if (kfifo_avail(&client->fifo) >= len) { + if (!client->drop_count && kfifo_avail(&client->fifo) >= len) { kfifo_in(&client->fifo, event_msg, len); wake_up_all(&client->wait_queue); } else { - pr_debug("smi_event(EventID: %u): no space left\n", - smi_event); + client->drop_count++; + pr_debug("smi_event(EventID: %u): no space left drop_count %d\n", + smi_event, client->drop_count); } spin_unlock(&client->lock); } diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index fa9f9846b88e4..7afd66d453133 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -530,6 +530,7 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + KFD_SMI_EVENT_DROPPED_EVENT = 12, /* * max event number, as a flag bit to get events from all processes, @@ -610,6 +611,7 @@ struct kfd_ioctl_smi_events_args { * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again * error_code: migrate failure error code, 0 if no error + * drop_count: how many events dropped when fifo is full */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -645,6 +647,10 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) +#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\ + "%lld -%d %d\n", (ns), (pid), (drop_count) + + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) *