Skip to content

Commit

Permalink
drm/amdkfd: Improve process termination handling
Browse files Browse the repository at this point in the history
Separate device queue termination from process queue manager
termination. Unmap all queues at once instead of one at a time.
Unmap device queues before the PASID is unbound, in the
kfd_process_iommu_unbind_callback.

When resetting wavefronts in non-HWS mode, do it before the VMID is
released.

Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: shaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
  • Loading branch information
Felix Kuehling authored and Oded Gabbay committed Sep 27, 2017
1 parent c4744e2 commit 9fd3f1b
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 81 deletions.
185 changes: 147 additions & 38 deletions drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,65 +296,73 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
return retval;
}

static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
* to avoid asynchronized access
*/
static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int retval;
struct mqd_manager *mqd;

retval = 0;

mutex_lock(&dqm->lock);
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd)
return -ENOMEM;

if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (mqd == NULL) {
retval = -ENOMEM;
goto out;
}
deallocate_hqd(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
if (mqd == NULL) {
retval = -ENOMEM;
goto out;
}
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
} else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
retval = -EINVAL;
goto out;
return -EINVAL;
}
dqm->total_queue_count--;

retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
q->pipe, q->queue);

if (retval)
goto out;
if (retval == -ETIME)
qpd->reset_wavefronts = true;

mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

list_del(&q->list);
if (list_empty(&qpd->queues_list))
if (list_empty(&qpd->queues_list)) {
if (qpd->reset_wavefronts) {
pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
dqm->dev);
/* dbgdev_wave_reset_wavefronts has to be called before
* deallocate_vmid(), i.e. when vmid is still in use.
*/
dbgdev_wave_reset_wavefronts(dqm->dev,
qpd->pqm->process);
qpd->reset_wavefronts = false;
}

deallocate_vmid(dqm, qpd, q);
}
if (q->properties.is_active)
dqm->queue_count--;

/*
* Unconditionally decrement this counter, regardless of the queue's
* type
*/
dqm->total_queue_count--;
pr_debug("Total of %d queues are accountable so far\n",
dqm->total_queue_count);
return retval;
}

out:
static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int retval;

mutex_lock(&dqm->lock);
retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
mutex_unlock(&dqm->lock);

return retval;
}

Expand Down Expand Up @@ -921,10 +929,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param)
{
int retval;
struct kfd_process_device *pdd;

retval = 0;
int retval = 0;

if (!dqm->active_runlist)
return retval;
Expand All @@ -948,12 +953,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
/* should be timed out */
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
if (retval) {
pdd = kfd_get_process_device_data(dqm->dev,
kfd_get_process(current));
pdd->reset_wavefronts = true;
if (retval)
return retval;
}

pm_release_ib(&dqm->packets);
dqm->active_runlist = false;

Expand Down Expand Up @@ -1015,7 +1017,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
if (q->properties.is_active)
dqm->queue_count--;

execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
if (retval == -ETIME)
qpd->reset_wavefronts = true;

mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

Expand Down Expand Up @@ -1105,6 +1110,108 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
return retval;
}

static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q, *next;
struct device_process_node *cur, *next_dpn;
int retval = 0;

mutex_lock(&dqm->lock);

/* Clear all user mode queues */
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
int ret;

ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
if (ret)
retval = ret;
}

/* Unregister process */
list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
if (qpd == cur->qpd) {
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
break;
}
}

mutex_unlock(&dqm->lock);
return retval;
}


static int process_termination_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int retval;
struct queue *q, *next;
struct kernel_queue *kq, *kq_next;
struct mqd_manager *mqd;
struct device_process_node *cur, *next_dpn;
enum kfd_unmap_queues_filter filter =
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;

retval = 0;

mutex_lock(&dqm->lock);

/* Clean all kernel queues */
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
list_del(&kq->list);
dqm->queue_count--;
qpd->is_debug = false;
dqm->total_queue_count--;
filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
}

/* Clear all user mode queues */
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count--;

if (q->properties.is_active)
dqm->queue_count--;

dqm->total_queue_count--;
}

/* Unregister process */
list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
if (qpd == cur->qpd) {
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
break;
}
}

retval = execute_queues_cpsch(dqm, filter, 0);
if (retval || qpd->reset_wavefronts) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
qpd->reset_wavefronts = false;
}

/* lastly, free mqd resources */
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
goto out;
}
list_del(&q->list);
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
}

out:
mutex_unlock(&dqm->lock);
return retval;
}

struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
struct device_queue_manager *dqm;
Expand Down Expand Up @@ -1133,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
dqm->ops.process_termination = process_termination_cpsch;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
Expand All @@ -1147,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.initialize = initialize_nocpsch;
dqm->ops.uninitialize = uninitialize;
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
dqm->ops.process_termination = process_termination_nocpsch;
break;
default:
pr_err("Invalid scheduling policy %d\n", sched_policy);
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ struct device_process_node {
* @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
* memory apertures.
*
* @process_termination: Clears all process queues belongs to that device.
*
*/

struct device_queue_manager_ops {
Expand Down Expand Up @@ -120,6 +122,9 @@ struct device_queue_manager_ops {
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);

int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
};

struct device_queue_manager_asic_ops {
Expand Down
18 changes: 15 additions & 3 deletions drivers/gpu/drm/amd/amdkfd/kfd_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ struct qcm_process_device {
unsigned int queue_count;
unsigned int vmid;
bool is_debug;

/* This flag tells if we should reset all wavefronts on
* process termination
*/
bool reset_wavefronts;

/*
* All the memory management data should be here too
*/
Expand Down Expand Up @@ -454,6 +460,8 @@ struct kfd_process_device {
/* The device that owns this data. */
struct kfd_dev *dev;

/* The process that owns this kfd_process_device. */
struct kfd_process *process;

/* per-process-per device QCM data structure */
struct qcm_process_device qpd;
Expand All @@ -469,10 +477,12 @@ struct kfd_process_device {
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound;

/* This flag tells if we should reset all
* wavefronts on process termination
/* Flag used to tell the pdd has dequeued from the dqm.
* This is used to prevent dev->dqm->ops.process_termination() from
* being called twice when it is already called in IOMMU callback
* function.
*/
bool reset_wavefronts;
bool already_dequeued;
};

#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
Expand Down Expand Up @@ -659,6 +669,8 @@ struct process_queue_node {
struct list_head process_queue_list;
};

void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
Expand Down
Loading

0 comments on commit 9fd3f1b

Please sign in to comment.