Skip to content

Commit

Permalink
drm/amdkfd: Track unified memory when switching xnack mode
Browse files Browse the repository at this point in the history
Unified memory usage with xnack off is tracked to avoid oversubscribe
system memory, with xnack on, we don't track unified memory usage to
allow memory oversubscribe. When switching xnack mode from off to on,
subsequent free ranges allocated with xnack off will not unreserve
memory. When switching xnack mode from on to off, subsequent free ranges
allocated with xnack on will unreserve memory. Both cases cause memory
accounting unbalanced.

When switching xnack mode from on to off, need reserve already allocated
svm range memory. When switching xnack mode from off to on, need
unreserve already allocated svm range memory.

v6: Take prange lock to access range child list
v5: Handle prange child ranges
v4: Handle reservation memory failure
v3: Handle switching xnack mode race with svm_range_deferred_list_work
v2: Handle both switching xnack from on to off and from off to on cases

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Philip Yang authored and Alex Deucher committed Sep 30, 2022
1 parent 730548b commit 8a7c3ce
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 7 deletions.
26 changes: 20 additions & 6 deletions drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,8 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(pdd->dev, &args->anon_fd);
}

#if IS_ENABLED(CONFIG_HSA_AMD_SVM)

static int kfd_ioctl_set_xnack_mode(struct file *filep,
struct kfd_process *p, void *data)
{
Expand All @@ -1594,22 +1596,29 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
if (args->xnack_enabled >= 0) {
if (!list_empty(&p->pqm.queues)) {
pr_debug("Process has user queues running\n");
mutex_unlock(&p->mutex);
return -EBUSY;
r = -EBUSY;
goto out_unlock;
}
if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))

if (p->xnack_enabled == args->xnack_enabled)
goto out_unlock;

if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
r = -EPERM;
else
p->xnack_enabled = args->xnack_enabled;
goto out_unlock;
}

r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);
} else {
args->xnack_enabled = p->xnack_enabled;
}

out_unlock:
mutex_unlock(&p->mutex);

return r;
}

#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_svm_args *args = data;
Expand All @@ -1629,6 +1638,11 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
return r;
}
#else
static int kfd_ioctl_set_xnack_mode(struct file *filep,
struct kfd_process *p, void *data)
{
return -EPERM;
}
static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
{
return -EPERM;
Expand Down
60 changes: 59 additions & 1 deletion drivers/gpu/drm/amd/amdkfd/kfd_svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
svm_range_free_dma_mappings(prange);

if (update_mem_usage && !p->xnack_enabled) {
pr_debug("unreserve mem limit: %lld\n", size);
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
}
Expand Down Expand Up @@ -2956,6 +2956,64 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
return r;
}

int
svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
{
struct svm_range *prange, *pchild;
uint64_t reserved_size = 0;
uint64_t size;
int r = 0;

pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);

mutex_lock(&p->svms.lock);

list_for_each_entry(prange, &p->svms.list, list) {
svm_range_lock(prange);
list_for_each_entry(pchild, &prange->child_list, child_list) {
size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
} else {
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
if (r)
goto out_unlock;
reserved_size += size;
}
}

size = (prange->last - prange->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
} else {
r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
if (r)
goto out_unlock;
reserved_size += size;
}
out_unlock:
svm_range_unlock(prange);
if (r)
break;
}

if (r)
amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
else
/* Change xnack mode must be inside svms lock, to avoid race with
* svm_range_deferred_list_work unreserve memory in parallel.
*/
p->xnack_enabled = xnack_enabled;

mutex_unlock(&p->svms.lock);
return r;
}

void svm_range_list_fini(struct kfd_process *p)
{
struct svm_range *prange;
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdkfd/kfd_svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);

void svm_range_set_max_pages(struct amdgpu_device *adev);
int svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled);

#else

Expand Down

0 comments on commit 8a7c3ce

Please sign in to comment.