diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 830f8d4ebcc25..dcc965af23542 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1392,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Free the BO*/ - amdgpu_bo_unref(&mem->bo); + drm_gem_object_put_unlocked(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1895,7 +1895,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; - (*mem)->bo = amdgpu_bo_ref(bo); + drm_gem_object_get(&bo->tbo.base); + (*mem)->bo = bo; (*mem)->va = va; (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 5b591c9d046bf..d0ddd46b74149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -395,7 +395,9 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; /* fw bug so temporarily disable pstate switching */ - if (!hive || adev->asic_type == CHIP_VEGA20) + return 0; + + if (!hive || adev->asic_type != CHIP_VEGA20) return 0; mutex_lock(&hive->hive_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a09e4a5d754f1..4baeb12e764bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ + $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o \ $(AMDKFD_PATH)/kfd_rdma.o \ $(AMDKFD_PATH)/kfd_peerdirect.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba93cfe03..24b4717341172 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(&info, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 77feaf92ced55..1b60e0ed6b5cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -47,6 +47,7 @@ #include "kfd_trace.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -2681,7 +2682,7 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, target = need_proc_create ? kfd_create_process(thread) : kfd_lookup_process_by_pid(pid); if (!target) { - pr_err("Cannot find process info info for %i\n", + pr_debug("Cannot find process info for %i\n", args->pid); r = -ESRCH; goto out; @@ -2832,8 +2833,13 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, data3, /* Grace Period */ data1, /* Flags */ queue_id_array); /* array of queue ids */ - if (r) + + if (copy_to_user((void __user *)args->ptr, queue_id_array, + sizeof(uint32_t) * data2)) { + r = -EFAULT; goto unlock_out; + } + break; case KFD_IOC_DBG_TRAP_NODE_RESUME: @@ -2841,8 +2847,13 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, data2, /* Number of queues */ data1, /* Flags */ queue_id_array); /* array of queue ids */ - if (r) + + if (copy_to_user((void __user *)args->ptr, queue_id_array, + sizeof(uint32_t) * data2)) { + r = -EFAULT; goto unlock_out; + } + break; case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: r = kfd_dbg_ev_query_debug_event(pdd, &args->data1, @@ -2896,6 +2907,20 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, return r; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpuid); + if (!dev) + return -EINVAL; + + return kfd_smi_event_open(dev, &args->anon_fd); +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -2992,6 +3017,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, + kfd_ioctl_smi_events, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_IMPORT_HANDLE, kfd_ioctl_ipc_import_handle, 0), @@ -3005,8 +3033,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_dbg_set_debug_trap, 0), }; -#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) - static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; @@ -3018,10 +3044,8 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) unsigned int usize, asize; int retcode = -EINVAL; - if (nr >= AMDKFD_CORE_IOCTL_COUNT) - goto err_i1; - - if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { + if (((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) || + ((nr >= AMDKFD_COMMAND_START_2) && (nr < AMDKFD_COMMAND_END_2))) { u32 amdkfd_size; ioctl = &amdkfd_ioctls[nr]; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug_events.c index e8b59f194ec77..c8aa40416cbf1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug_events.c @@ -122,8 +122,6 @@ uint32_t kfd_dbg_get_queue_status_word(struct queue *q, int flags) if (flags & KFD_DBG_EV_FLAG_CLEAR_STATUS) WRITE_ONCE(q->properties.debug_event_type, 0); - q->properties.is_new = false; - return queue_status_word; } @@ -153,17 +151,22 @@ int kfd_dbg_ev_query_debug_event(struct kfd_process_device *pdd, } *event_status = kfd_dbg_get_queue_status_word(q, flags); - + q->properties.is_new = false; goto out; } list_for_each_entry(pqn, &pqm->queues, process_queue_list) { - unsigned int tmp_status = - kfd_dbg_get_queue_status_word(pqn->q, flags); - if (pqn->q && (tmp_status & (KFD_DBG_EV_STATUS_TRAP | - KFD_DBG_EV_STATUS_VMFAULT))) { + unsigned int tmp_status; + + if (!pqn->q) + continue; + + tmp_status = kfd_dbg_get_queue_status_word(pqn->q, flags); + if (tmp_status & (KFD_DBG_EV_STATUS_TRAP | + KFD_DBG_EV_STATUS_VMFAULT)) { *queue_id = pqn->q->properties.queue_id; *event_status = tmp_status; + pqn->q->properties.is_new = false; goto out; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0491ab2b4a9b3..2c030c2b5b8d0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -586,6 +586,11 @@ static int kfd_gws_init(struct kfd_dev *kfd) return ret; } +static void kfd_smi_init(struct kfd_dev *dev) { + INIT_LIST_HEAD(&dev->smi_clients); + spin_lock_init(&dev->smi_lock); +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) @@ -700,6 +705,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } + kfd_smi_init(kfd); + kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f4536f94b184e..7c894dc6b0aec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_mqd_manager.h" @@ -664,6 +665,9 @@ static int suspend_single_queue(struct device_queue_manager *dqm, struct kfd_process_device *pdd, struct queue *q) { + if (q->properties.is_suspended) + return 0; + pr_debug("Suspending PASID %u queue [%i]\n", pdd->process->pasid, q->properties.queue_id); @@ -703,6 +707,9 @@ static void resume_single_queue(struct device_queue_manager *dqm, struct kfd_process_device *pdd; uint64_t pd_base; + if (!q->properties.is_suspended) + return; + pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); @@ -2209,20 +2216,27 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm) return r; } -#define INVALID_QUEUE_ID 0xffffffff #define QUEUE_NOT_FOUND -1 -static int queue_idx_in_array(unsigned int queue_id, +/* invalidate queue operation in array */ +static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) +{ + int i; + + for (i = 0; i < num_queues; i++) + queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; +} + +/* find queue index in array */ +static int q_array_get_index(unsigned int queue_id, uint32_t num_queues, uint32_t *queue_ids) { int i; - if (queue_id == INVALID_QUEUE_ID) - return QUEUE_NOT_FOUND; - for (i = 0; i < num_queues; i++) - if (queue_id == queue_ids[i]) + if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) return i; + return QUEUE_NOT_FOUND; } @@ -2282,50 +2296,65 @@ int resume_queues(struct kfd_process *p, uint32_t flags, uint32_t *queue_ids) { - int r = -ENODEV; struct kfd_process_device *pdd; - struct queue *q; + int total_resumed = 0; + + /* mask all queues as invalid. unmask per successful request */ + q_array_invalidate(num_queues, queue_ids); list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - bool queues_resumed_on_device = false; struct device_queue_manager *dqm = pdd->dev->dqm; struct qcm_process_device *qpd = &pdd->qpd; + struct queue *q; + int r, per_device_resumed = 0; dqm_lock(dqm); - /* We need to loop over all of the queues on this - * device, and check if it is in the list passed in, - * and if it is, we will restore it. - */ + /* unmask queues that resume or already resumed as valid */ list_for_each_entry(q, &qpd->queues_list, list) { - if (queue_idx_in_array(q->properties.queue_id, + int q_idx = q_array_get_index(q->properties.queue_id, num_queues, - queue_ids) != QUEUE_NOT_FOUND) { - if (!q->properties.is_suspended) - continue; - resume_single_queue(dqm, - &pdd->qpd, - q); - queues_resumed_on_device = true; + queue_ids); + + if (q_idx != QUEUE_NOT_FOUND) { + resume_single_queue(dqm, &pdd->qpd, q); + queue_ids[q_idx] &= + ~KFD_DBG_QUEUE_INVALID_MASK; + per_device_resumed++; } } - if (queues_resumed_on_device) { - r = execute_queues_cpsch(dqm, + if (!per_device_resumed) { + dqm_unlock(dqm); + continue; + } + + r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); - if (r) { - pr_err("Failed to resume process queues\n"); - dqm_unlock(dqm); - return r; + if (r) { + pr_err("Failed to resume process queues\n"); + list_for_each_entry(q, &qpd->queues_list, list) { + int q_idx = q_array_get_index( + q->properties.queue_id, + num_queues, + queue_ids); + + /* mask queue as error on resume fail */ + if (q_idx != QUEUE_NOT_FOUND) + queue_ids[q_idx] |= + KFD_DBG_QUEUE_ERROR_MASK; } + } else { wake_up_all(&dqm->destroy_wait); + total_resumed += per_device_resumed; } dqm_unlock(dqm); } - return r; + + return total_resumed; } int suspend_queues(struct kfd_process *p, @@ -2334,35 +2363,32 @@ int suspend_queues(struct kfd_process *p, uint32_t flags, uint32_t *queue_ids) { - int err = 0, r = -ENODEV; struct kfd_process_device *pdd; - struct queue *q; int total_suspended = 0; + /* mask all queues as invalid. umask on successful request */ + q_array_invalidate(num_queues, queue_ids); + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { struct device_queue_manager *dqm = pdd->dev->dqm; struct qcm_process_device *qpd = &pdd->qpd; - int per_device_suspended = 0; + struct queue *q; + int r, per_device_suspended = 0; dqm_lock(dqm); - /* We need to loop over all of the queues on this - * device, and check if it is in the list passed in, - * and if it is, we will evict it. - */ + /* unmask queues that suspend or already suspended */ list_for_each_entry(q, &qpd->queues_list, list) { - int q_idx = queue_idx_in_array(q->properties.queue_id, + int q_idx = q_array_get_index(q->properties.queue_id, num_queues, queue_ids); - if (q_idx == QUEUE_NOT_FOUND) - continue; - - if (q->properties.is_suspended || - suspend_single_queue(dqm, pdd, q)) - queue_ids[q_idx] = INVALID_QUEUE_ID; - else + if (q_idx != QUEUE_NOT_FOUND && + !suspend_single_queue(dqm, pdd, q)) { + queue_ids[q_idx] &= + ~KFD_DBG_QUEUE_INVALID_MASK; per_device_suspended++; + } } if (!per_device_suspended) { @@ -2376,41 +2402,27 @@ int suspend_queues(struct kfd_process *p, if (r) pr_err("Failed to suspend process queues.\n"); + else + total_suspended += per_device_suspended; list_for_each_entry(q, &qpd->queues_list, list) { - bool is_q = queue_idx_in_array(q->properties.queue_id, - num_queues, queue_ids) != QUEUE_NOT_FOUND; - /* unmark is_suspend on unexpected failure */ - if (r && is_q) { - resume_single_queue(dqm, qpd, q); - q->properties.queue_id = INVALID_QUEUE_ID; - } else if ((flags & KFD_DBG_EV_FLAG_CLEAR_STATUS) && - !r && is_q) - WRITE_ONCE(q->properties.debug_event_type, 0); - } + int q_idx = q_array_get_index(q->properties.queue_id, + num_queues, queue_ids); - dqm_unlock(dqm); + if (q_idx == QUEUE_NOT_FOUND) + continue; - /* failed to suspend for unexpected reason */ - if (r) { - wake_up_all(&dqm->destroy_wait); - per_device_suspended = 0; - err = r; + /* mask queue as error on suspend fail */ + if (r) + queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; + else if (flags & KFD_DBG_EV_FLAG_CLEAR_STATUS) + WRITE_ONCE(q->properties.debug_event_type, 0); } - total_suspended += per_device_suspended; + dqm_unlock(dqm); amdgpu_amdkfd_debug_mem_fence(dqm->dev->kgd); } - /* rollback suspended queues */ - if (total_suspended < num_queues) { - pr_debug("Failed to suspend requested queues. Rolling back.\n"); - r = resume_queues(p, num_queues, flags, queue_ids); - if (r) - return r; - return -EINVAL; - } - if (total_suspended) { struct copy_context_work_handler_workarea copy_context_worker; @@ -2426,7 +2438,8 @@ int suspend_queues(struct kfd_process *p, flush_work(©_context_worker.copy_context_work); destroy_work_on_stack(©_context_worker.copy_context_work); } - return err; + + return total_suspended; } static uint32_t set_queue_type_for_user(struct queue_properties *q_props) @@ -2463,6 +2476,7 @@ void set_queue_snapshot_entry(struct device_queue_manager *dqm, qss_entry->ring_size = (uint32_t)q->properties.queue_size; qss_entry->queue_type = set_queue_type_for_user(&q->properties); qss_entry->queue_status = kfd_dbg_get_queue_status_word(q, flags); + q->properties.is_new = false; dqm_unlock(dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index ee82632cfed4e..800eb2d791a22 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -25,6 +25,7 @@ #include "kfd_debug_events.h" #include "soc15_int.h" #include "kfd_device_queue_manager.h" +#include "kfd_smi_events.h" #define KFD_CONTEXT_ID_DEBUG_TRAP_MASK 0x000080 #define KFD_CONTEXT_ID_DEBUG_DOORBELL_MASK 0x0003ff @@ -130,6 +131,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, info.prot_write = ring_id & 0x20; kfd_set_dbg_ev_from_interrupt(dev, pasid, -1, true); + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); kfd_signal_vm_fault_event(dev, pasid, &info); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c index b733f6d09d43d..ee823de5ae547 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c @@ -88,9 +88,11 @@ static void ipc_obj_release(struct kref *r) kfree(obj); } -void ipc_obj_get(struct kfd_ipc_obj *obj) +struct kfd_ipc_obj *ipc_obj_get(struct kfd_ipc_obj *obj) { - kref_get(&obj->ref); + if (kref_get_unless_zero(&obj->ref)) + return obj; + return NULL; } void ipc_obj_put(struct kfd_ipc_obj **obj) @@ -196,7 +198,7 @@ int kfd_ipc_import_handle(struct kfd_dev *dev, struct kfd_process *p, &kfd_ipc_handles.handles[HANDLE_TO_KEY(share_handle)], node) { if (!memcmp(entry->share_handle, share_handle, sizeof(entry->share_handle))) { - found = entry; + found = ipc_obj_get(entry); break; } } @@ -204,7 +206,6 @@ int kfd_ipc_import_handle(struct kfd_dev *dev, struct kfd_process *p, if (!found) return -EINVAL; - ipc_obj_get(found); pr_debug("Found ipc_dma_buf: %p\n", found->data); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h index 9ee8627b88b08..a6560eae9ff50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h @@ -45,7 +45,7 @@ int kfd_ipc_import_dmabuf(struct kfd_dev *kfd, struct kfd_process *p, int kfd_ipc_export_as_handle(struct kfd_dev *dev, struct kfd_process *p, uint64_t handle, uint32_t *ipc_handle); -void ipc_obj_get(struct kfd_ipc_obj *obj); +struct kfd_ipc_obj *ipc_obj_get(struct kfd_ipc_obj *obj); void ipc_obj_put(struct kfd_ipc_obj **obj); #endif /* KFD_IPC_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 060e905cf043c..60243798cce24 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -322,6 +322,10 @@ struct kfd_dev { /* Global GWS resource shared b/t processes*/ void *gws; + + /* Clients watching SMI events */ + struct list_head smi_clients; + spinlock_t smi_lock; }; struct kfd_ipc_obj; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2c30dd3a7e6cb..7a5ff68f682a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -171,6 +171,7 @@ static int init_user_queue(struct process_queue_manager *pqm, /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; + q_properties->is_new = true; /* let DQM handle it*/ q_properties->vmid = 0; @@ -316,7 +317,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (q) { pr_debug("PQM done creating queue\n"); - q->properties.is_new = true; kfd_procfs_add_queue(q); print_queue_properties(&q->properties); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c index 58a11fab2bc7d..905070180c8cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c @@ -113,6 +113,7 @@ static int get_pages(uint64_t address, uint64_t length, struct pid *pid, rdma_cb_data->amd_p2p_data.pid = pid; rdma_cb_data->amd_p2p_data.priv = buf_obj; rdma_cb_data->amd_p2p_data.pages = sg_table_tmp; + rdma_cb_data->amd_p2p_data.kfd_proc = p; rdma_cb_data->free_callback = free_callback; rdma_cb_data->client_priv = client_priv; @@ -128,7 +129,6 @@ static int get_pages(uint64_t address, uint64_t length, struct pid *pid, kfree(rdma_cb_data); out: mutex_unlock(&p->mutex); - kfd_unref_process(p); return ret; } @@ -186,7 +186,6 @@ void run_rdma_free_callback(struct kfd_bo *buf_obj) */ static int put_pages(struct amd_p2p_info **p_p2p_data) { - struct kfd_process *p = NULL; int ret = 0; if (!(*p_p2p_data)) { @@ -194,18 +193,13 @@ static int put_pages(struct amd_p2p_info **p_p2p_data) return -EINVAL; } - p = kfd_lookup_process_by_pid((*p_p2p_data)->pid); - if (!p) { - pr_err("Could not find the process\n"); - return -EINVAL; - } - ret = put_pages_helper(*p_p2p_data); - if (!ret) + if (!ret) { + kfd_unref_process((*p_p2p_data)->kfd_proc); *p_p2p_data = NULL; - kfd_unref_process(p); + } return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c new file mode 100644 index 0000000000000..f5fd18eacf0d4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -0,0 +1,214 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "amdgpu_vm.h" +#include "kfd_priv.h" +#include "kfd_smi_events.h" + +struct kfd_smi_client { + struct list_head list; + struct kfifo fifo; + wait_queue_head_t wait_queue; + /* events enabled */ + uint64_t events; + struct kfd_dev *dev; + spinlock_t lock; +}; + +#define MAX_KFIFO_SIZE 1024 + +static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *); +static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t, + loff_t *); +static int kfd_smi_ev_release(struct inode *, struct file *); + +static const char kfd_smi_name[] = "kfd_smi_ev"; + +static const struct file_operations kfd_smi_ev_fops = { + .owner = THIS_MODULE, + .poll = kfd_smi_ev_poll, + .read = kfd_smi_ev_read, + .write = kfd_smi_ev_write, + .release = kfd_smi_ev_release +}; + +static __poll_t kfd_smi_ev_poll(struct file *filep, + struct poll_table_struct *wait) +{ + __poll_t mask; + struct kfd_smi_client *client = filep->private_data; + + poll_wait(filep, &client->wait_queue, wait); + + spin_lock(&client->lock); + mask = kfifo_is_empty(&client->fifo) ? 0 : POLLIN | POLLRDNORM; + spin_unlock(&client->lock); + + return mask; +} + +static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, + size_t size, loff_t *offset) +{ + int ret; + size_t to_copy; + struct kfd_smi_client *client = filep->private_data; + unsigned char buf[MAX_KFIFO_SIZE]; + + BUILD_BUG_ON(MAX_KFIFO_SIZE > 1024); + + /* kfifo_to_user can sleep so we can't use spinlock protection around + * it. Instead, we kfifo out as spinlocked then copy them to the user. + */ + spin_lock(&client->lock); + to_copy = kfifo_len(&client->fifo); + if (!to_copy) { + spin_unlock(&client->lock); + return -EAGAIN; + } + to_copy = min3(size, sizeof(buf), to_copy); + ret = kfifo_out(&client->fifo, buf, to_copy); + spin_unlock(&client->lock); + if (ret <= 0) + return -EAGAIN; + + ret = copy_to_user(user, buf, to_copy); + if (ret) + return -EFAULT; + + return to_copy; +} + +static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user, + size_t size, loff_t *offset) +{ + struct kfd_smi_client *client = filep->private_data; + uint64_t events; + + if (!access_ok(user, size) || size < sizeof(events)) + return -EFAULT; + if (copy_from_user(&events, user, sizeof(events))) + return -EFAULT; + + WRITE_ONCE(client->events, events); + + return sizeof(events); +} + +static int kfd_smi_ev_release(struct inode *inode, struct file *filep) +{ + struct kfd_smi_client *client = filep->private_data; + struct kfd_dev *dev = client->dev; + + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + kfifo_free(&client->fifo); + kfree(client); + + return 0; +} + +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; + struct amdgpu_task_info task_info; + /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ + /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43 + */ + char fifo_in[43]; + struct kfd_smi_client *client; + int len; + + if (list_empty(&dev->smi_clients)) + return; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, pasid, &task_info); + /* Report VM faults from user applications, not retry from kernel */ + if (!task_info.pid) + return; + + len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT, + task_info.pid, task_info.task_name); + + rcu_read_lock(); + + list_for_each_entry_rcu(client, &dev->smi_clients, list) { + if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT)) + continue; + spin_lock(&client->lock); + if (kfifo_avail(&client->fifo) >= len) { + kfifo_in(&client->fifo, fifo_in, len); + wake_up_all(&client->wait_queue); + } + else + pr_debug("smi_event(vmfault): no space left\n"); + spin_unlock(&client->lock); + } + + rcu_read_unlock(); +} + +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) +{ + struct kfd_smi_client *client; + int ret; + + client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL); + if (!client) + return -ENOMEM; + INIT_LIST_HEAD(&client->list); + + ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL); + if (ret) { + kfree(client); + return ret; + } + + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + + init_waitqueue_head(&client->wait_queue); + spin_lock_init(&client->lock); + client->events = 0; + client->dev = dev; + + spin_lock(&dev->smi_lock); + list_add_rcu(&client->list, &dev->smi_clients); + spin_unlock(&dev->smi_lock); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h new file mode 100644 index 0000000000000..a9cb218fef96e --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_SMI_EVENTS_H_INCLUDED +#define KFD_SMI_EVENTS_H_INCLUDED + +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); + +#endif diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 8751216bc6ec1..a149cb0ab02c9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -133,10 +133,17 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, * VM_FAULT_RETRY if blocking wait. * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. */ +#ifndef HAVE_VM_FAULT_ADDRESS_VMA vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, struct vm_fault *vmf, struct vm_area_struct *vma) { +#else +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#endif /* * Work around locking order reversal in fault / nopfn * between mmap_sem and bo_reserve: Perform a trylock operation @@ -187,11 +194,19 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve); * VM_FAULT_OOM on out-of-memory * VM_FAULT_RETRY if retryable wait */ +#ifndef HAVE_VM_FAULT_ADDRESS_VMA vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, struct vm_area_struct *vma, pgprot_t prot, pgoff_t num_prefault) { +#else +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; +#endif struct ttm_buffer_object *bo = vma->vm_private_data; struct ttm_bo_device *bdev = bo->bdev; unsigned long page_offset; @@ -353,12 +368,20 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) struct ttm_buffer_object *bo = vma->vm_private_data; vm_fault_t ret; +#ifndef HAVE_VM_FAULT_ADDRESS_VMA ret = ttm_bo_vm_reserve(bo, vmf, vma); +#else + ret = ttm_bo_vm_reserve(bo, vmf); +#endif if (ret) return ret; prot = vma->vm_page_prot; +#ifndef HAVE_VM_FAULT_ADDRESS_VMA ret = ttm_bo_vm_fault_reserved(vmf, vma, prot, TTM_BO_VM_NUM_PREFAULT); +#else + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); +#endif #ifdef FAULT_FLAG_RETRY_NOWAIT if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) #else diff --git a/include/drm/amd_rdma.h b/include/drm/amd_rdma.h index b0cab3c2b03c6..8dd8464f2d757 100644 --- a/include/drm/amd_rdma.h +++ b/include/drm/amd_rdma.h @@ -45,6 +45,9 @@ struct amd_p2p_info { void *priv; /**< Pointer set by AMD kernel * driver */ + struct kfd_process *kfd_proc; /**< Reference to kfd_process that + * corresponds to this P2P access + */ }; /** diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index d017de27361eb..04cf3a0c27aa3 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -44,6 +44,7 @@ #ifndef HAVE_CONFIG_H #define HAVE_DRM_GEM_OBJECT_RESV 1 +#define HAVE_VM_FAULT_ADDRESS_VMA 1 #endif struct ttm_bo_global; @@ -740,6 +741,7 @@ static inline bool ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object *bo) /* Default number of pre-faulted pages in the TTM fault handler */ #define TTM_BO_VM_NUM_PREFAULT 16 +#ifndef HAVE_VM_FAULT_ADDRESS_VMA vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, struct vm_fault *vmf, struct vm_area_struct *vma); @@ -749,6 +751,16 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pgprot_t prot, pgoff_t num_prefault); +#else +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf); + +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault); + +#endif + #if defined(HAVE_VM_OPERATIONS_STRUCT_FAULT_2ARG) vm_fault_t ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf); #else diff --git a/include/kcl/kcl_drm.h b/include/kcl/kcl_drm.h index ac9b91e55d3fa..831416cb0fe8b 100644 --- a/include/kcl/kcl_drm.h +++ b/include/kcl/kcl_drm.h @@ -224,6 +224,12 @@ drm_gem_object_put_unlocked(struct drm_gem_object *obj) { return drm_gem_object_unreference_unlocked(obj); } + +static inline void +drm_gem_object_get(struct drm_gem_object *obj) +{ + kref_get(&obj->refcount); +} #endif #if !defined(HAVE_DRM_IS_CURRENT_MASTER) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 292fc1abe27f6..e5e319bb6c5b4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -26,8 +26,12 @@ #include #include +/* + * - 1.1 - initial version + * - 1.3 - Add SMI events support + */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 2 +#define KFD_IOCTL_MINOR_VERSION 3 /* * Debug revision change log @@ -40,9 +44,11 @@ * 1.3 - Fix race condition between clear on suspend and trap event handling * 1.4 - Fix bad kfifo free * 1.5 - Fix ABA issue between queue snapshot and suspend + * 2.0 - Return number of queues suspended/resumed and mask invalid/error + * array slots */ -#define KFD_IOCTL_DBG_MAJOR_VERSION 1 -#define KFD_IOCTL_DBG_MINOR_VERSION 5 +#define KFD_IOCTL_DBG_MAJOR_VERSION 2 +#define KFD_IOCTL_DBG_MINOR_VERSION 0 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -224,6 +230,12 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_DBG_EV_STATUS_NEW_QUEUE 8 #define KFD_DBG_EV_FLAG_CLEAR_STATUS 1 +/* queue states for suspend/resume */ +#define KFD_DBG_QUEUE_ERROR_BIT 30 +#define KFD_DBG_QUEUE_INVALID_BIT 31 +#define KFD_DBG_QUEUE_ERROR_MASK (1 << KFD_DBG_QUEUE_ERROR_BIT) +#define KFD_DBG_QUEUE_INVALID_MASK (1 << KFD_DBG_QUEUE_INVALID_BIT) + #define KFD_INVALID_QUEUEID 0xffffffff /* KFD_IOC_DBG_TRAP_ENABLE: @@ -251,18 +263,44 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE 2 /* KFD_IOC_DBG_TRAP_NODE_SUSPEND: - * ptr: pointer to an array of Queues IDs - * data1: flags - * data2: number of queues - * data3: grace period + * ptr: pointer to an array of Queues IDs (IN/OUT) + * data1: flags (IN) + * data2: number of queues (IN) + * data3: grace period (IN) + * + * Returns the number of queues suspended from array of Queue IDs (ptr). + * Requested queues that fail to suspend are masked in the array: + * + * KFD_DBG_QUEUE_INVALID_MASK - requested queue does not exist or cannot be + * suspended (new or being destroyed). + * + * KFD_DBG_QUEUE_ERROR_MASK - bad internal operation occurred on requested + * queue. + * + * NOTE! All queue destroy requests will be blocked on a suspended queue. + * Queue resume will unblock. + * + * KFD_DBG_EV_FLAG_CLEAR_STATUS can be passed as a flag (data1) to clear + * pending events. + * + * Grace period (data3) is time allowed for waves to complete before CWSR. + * 0 can be entered for immediate preemption. */ #define KFD_IOC_DBG_TRAP_NODE_SUSPEND 3 /* KFD_IOC_DBG_TRAP_NODE_RESUME: - * ptr: pointer to an array of Queues IDs - * data1: flags - * data2: number of queues - * data3: unused + * ptr: pointer to an array of Queues IDs (IN/OUT) + * data1: flags (IN) + * data2: number of queues (IN) + * data3: unused (IN) + * + * Returns the number of queues resumed from array of Queue IDs (ptr). + * Requested queues that fail to resume are masked in the array: + * + * KFD_DBG_QUEUE_INVALID_MASK - requested queue does not exist. + * + * KFD_DBG_QUEUE_ERROR_MASK - bad internal operation occurred on requested + * queue. */ #define KFD_IOC_DBG_TRAP_NODE_RESUME 4 @@ -555,6 +593,17 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* + * KFD SMI(System Management Interface) events + */ +/* Event type (defined by bitmask) */ +#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 + +struct kfd_ioctl_smi_events_args { + __u32 gpuid; /* to KFD */ + __u32 anon_fd; /* from KFD */ +}; + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -706,19 +755,26 @@ struct kfd_ioctl_cross_memory_copy_args { #define AMDKFD_IOC_ALLOC_QUEUE_GWS \ AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) +#define AMDKFD_IOC_SMI_EVENTS \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) + +#define AMDKFD_COMMAND_START 0x01 +#define AMDKFD_COMMAND_END 0x20 + +/* non-upstream ioctls */ #define AMDKFD_IOC_IPC_IMPORT_HANDLE \ - AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args) + AMDKFD_IOWR(0x80, struct kfd_ioctl_ipc_import_handle_args) #define AMDKFD_IOC_IPC_EXPORT_HANDLE \ - AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args) + AMDKFD_IOWR(0x81, struct kfd_ioctl_ipc_export_handle_args) #define AMDKFD_IOC_DBG_TRAP \ - AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args) + AMDKFD_IOWR(0x82, struct kfd_ioctl_dbg_trap_args) #define AMDKFD_IOC_CROSS_MEMORY_COPY \ - AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args) + AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_args) -#define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x23 +#define AMDKFD_COMMAND_START_2 0x80 +#define AMDKFD_COMMAND_END_2 0x84 #endif