Skip to content

Commit

Permalink
drm/amdkfd: Page aligned memory reserve size
Browse files Browse the repository at this point in the history
Use page aligned size to reserve memory usage because page aligned TTM
BO size is used to unreserve memory usage, otherwise no page aligned
size causes memory usage accounting unbalanced.

Change vram_used definition type to int64_t to be able to trigger
WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
accounting issue with warning and backtrace.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Philip Yang authored and Alex Deucher committed Jan 11, 2023
1 parent 23b02b0 commit 0c2dece
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {

struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
Expand Down
12 changes: 7 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
uint64_t aligned_size;
u64 alloc_flags;
int ret;

Expand Down Expand Up @@ -1653,22 +1654,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* the memory.
*/
if ((*mem)->aql_queue)
size = size >> 1;
size >>= 1;
aligned_size = PAGE_ALIGN(size);

(*mem)->alloc_flags = flags;

amdgpu_sync_create(&(*mem)->sync);

ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}

pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
va, size, domain_string(alloc_domain));
va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));

ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
bo_type, NULL, &gobj);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
Expand Down Expand Up @@ -1725,7 +1727,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
Expand Down
9 changes: 7 additions & 2 deletions drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1127,8 +1127,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}

/* Update the VRAM usage count */
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
uint64_t size = args->size;

if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
}

mutex_unlock(&p->mutex);

Expand Down

0 comments on commit 0c2dece

Please sign in to comment.