Skip to content

Commit

Permalink
Revert "drm/amdgpu: replace get_user_pages with HMM mirror helpers"
Browse files Browse the repository at this point in the history
This reverts commit 915d3ee.

This depends on an HMM fix which is not upstream yet.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Alex Deucher committed Mar 28, 2019
1 parent 8944042 commit 318c3f4
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 182 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct kgd_mem {

atomic_t invalid;
struct amdkfd_process_info *process_info;
struct page **user_pages;

struct amdgpu_sync sync;

Expand Down
95 changes: 70 additions & 25 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,12 +491,28 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
goto out;
}

ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
/* If no restore worker is running concurrently, user_pages
* should not be allocated
*/
WARN(mem->user_pages, "Leaking user_pages array");

mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!mem->user_pages) {
pr_err("%s: Failed to allocate pages array\n", __func__);
ret = -ENOMEM;
goto unregister_out;
}

ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
goto free_out;
}

amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);

ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
Expand All @@ -509,7 +525,11 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
amdgpu_bo_unreserve(bo);

release_out:
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
if (ret)
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
free_out:
kvfree(mem->user_pages);
mem->user_pages = NULL;
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
Expand Down Expand Up @@ -568,6 +588,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);

amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
Expand Down Expand Up @@ -631,6 +652,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);

i = 0;
Expand Down Expand Up @@ -1240,6 +1262,15 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);

/* Free user pages if necessary */
if (mem->user_pages) {
pr_debug("%s: Freeing user_pages array\n", __func__);
if (mem->user_pages[0])
release_pages(mem->user_pages,
mem->bo->tbo.ttm->num_pages);
kvfree(mem->user_pages);
}

ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
Expand Down Expand Up @@ -1713,11 +1744,25 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,

bo = mem->bo;

if (!mem->user_pages) {
mem->user_pages =
kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
if (!mem->user_pages) {
pr_err("%s: Failed to allocate pages array\n",
__func__);
return -ENOMEM;
}
} else if (mem->user_pages[0]) {
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
}

/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
bo->tbo.ttm->pages);
mem->user_pages);
if (ret) {
bo->tbo.ttm->pages[0] = NULL;
mem->user_pages[0] = NULL;
pr_info("%s: Failed to get user pages: %d\n",
__func__, ret);
/* Pretend it succeeded. It will fail later
Expand All @@ -1726,6 +1771,12 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* stalled user mode queues.
*/
}

/* Mark the BO as valid unless it was invalidated
* again concurrently
*/
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
return -EAGAIN;
}

return 0;
Expand Down Expand Up @@ -1755,8 +1806,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
ret = -ENOMEM;
goto out_no_mem;
return -ENOMEM;
}

INIT_LIST_HEAD(&resv_list);
Expand All @@ -1780,7 +1830,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
goto out_free;
goto out;

amdgpu_sync_create(&sync);

Expand All @@ -1796,8 +1846,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)

bo = mem->bo;

/* Validate the BO if we got user pages */
if (bo->tbo.ttm->pages[0]) {
/* Copy pages array and validate the BO if we got user pages */
if (mem->user_pages[0]) {
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
mem->user_pages);
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
Expand All @@ -1806,16 +1858,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}

/* Validate succeeded, now the BO owns the pages, free
* our copy of the pointer array. Put this BO back on
* the userptr_valid_list. If we need to revalidate
* it, we need to start from scratch.
*/
kvfree(mem->user_pages);
mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);

/* Stop HMM track the userptr update. We dont check the return
* value for concurrent CPU page table update because we will
* reschedule the restore worker if process_info->evicted_bos
* is updated.
*/
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
Expand Down Expand Up @@ -1845,15 +1897,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
out_free:
out:
kfree(pd_bo_list_entries);
out_no_mem:
list_for_each_entry_safe(mem, tmp_mem,
&process_info->userptr_inval_list,
validate_list.head) {
bo = mem->bo;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
}

return ret;
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo_va *bo_va;
uint32_t priority;
struct page **user_pages;
bool user_invalidated;
int user_invalidated;
};

struct amdgpu_bo_list {
Expand Down
Loading

0 comments on commit 318c3f4

Please sign in to comment.