Skip to content

Commit

Permalink
drm/amdgpu: recover gart table at resume
Browse files Browse the repository at this point in the history
Get rid off pin/unpin of gart BO at resume/suspend and
instead pin only once and try to recover gart content
at resume time. This is much more stable in case there
is OOM situation at 2nd call to amdgpu_device_evict_resources()
while evicting GART table.

v3: remove gart recovery from other places
v2: pin gart at amdgpu_gart_table_vram_alloc()

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Nirmoy Das authored and Alex Deucher committed Jan 11, 2022
1 parent ec6aae9 commit 575e55e
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 99 deletions.
11 changes: 0 additions & 11 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -4016,16 +4016,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);

/* First evict vram memory */
amdgpu_device_evict_resources(adev);

amdgpu_fence_driver_hw_fini(adev);

amdgpu_device_ip_suspend_phase2(adev);
/* This second call to evict device resources is to evict
* the gart page table using the CPU.
*/
amdgpu_device_evict_resources(adev);

return 0;
}
Expand Down Expand Up @@ -4370,8 +4365,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
goto error;

amdgpu_virt_init_data_exchange(adev);
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);

r = amdgpu_device_fw_loading(adev);
if (r)
Expand Down Expand Up @@ -4691,10 +4684,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
amdgpu_inc_vram_lost(tmp_adev);
}

r = amdgpu_gtt_mgr_recover(&tmp_adev->mman.gtt_mgr);
if (r)
goto out;

r = amdgpu_device_fw_loading(tmp_adev);
if (r)
return r;
Expand Down
84 changes: 6 additions & 78 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,80 +114,12 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
*/
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
int r;

if (adev->gart.bo == NULL) {
struct amdgpu_bo_param bp;

memset(&bp, 0, sizeof(bp));
bp.size = adev->gart.table_size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);

r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
if (r) {
return r;
}
}
return 0;
}

/**
* amdgpu_gart_table_vram_pin - pin gart page table in vram
*
* @adev: amdgpu_device pointer
*
* Pin the GART page table in vram so it will not be moved
* by the memory manager (pcie r4xx, r5xx+). These asics require the
* gart table to be in video memory.
* Returns 0 for success, error for failure.
*/
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{
int r;

r = amdgpu_bo_reserve(adev->gart.bo, false);
if (unlikely(r != 0))
return r;
r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
if (r) {
amdgpu_bo_unreserve(adev->gart.bo);
return r;
}
r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
if (r)
amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
return r;
}

/**
* amdgpu_gart_table_vram_unpin - unpin gart page table in vram
*
* @adev: amdgpu_device pointer
*
* Unpin the GART page table in vram (pcie r4xx, r5xx+).
* These asics require the gart table to be in video memory.
*/
void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
{
int r;
if (adev->gart.bo != NULL)
return 0;

if (adev->gart.bo == NULL) {
return;
}
r = amdgpu_bo_reserve(adev->gart.bo, true);
if (likely(r == 0)) {
amdgpu_bo_kunmap(adev->gart.bo);
amdgpu_bo_unpin(adev->gart.bo);
amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.ptr = NULL;
}
return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
NULL, (void *)&adev->gart.ptr);
}

/**
Expand All @@ -201,11 +133,7 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{
if (adev->gart.bo == NULL) {
return;
}
amdgpu_bo_unref(&adev->gart.bo);
adev->gart.ptr = NULL;
amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
}

/*
Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo;

r = amdgpu_gart_table_vram_pin(adev);
r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;

Expand Down Expand Up @@ -1060,7 +1060,6 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
{
adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
amdgpu_gart_table_vram_unpin(adev);
}

static int gmc_v10_0_hw_fini(void *handle)
Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = amdgpu_gart_table_vram_pin(adev);
r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;

Expand Down Expand Up @@ -608,7 +608,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL3,
VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
(0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
amdgpu_gart_table_vram_unpin(adev);
}

static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = amdgpu_gart_table_vram_pin(adev);
r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;

Expand Down Expand Up @@ -758,7 +758,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
amdgpu_gart_table_vram_unpin(adev);
}

/**
Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = amdgpu_gart_table_vram_pin(adev);
r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;

Expand Down Expand Up @@ -999,7 +999,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32(mmVM_L2_CNTL, tmp);
WREG32(mmVM_L2_CNTL2, 0);
amdgpu_gart_table_vram_unpin(adev);
}

/**
Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo;

r = amdgpu_gart_table_vram_pin(adev);
r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (r)
return r;

Expand Down Expand Up @@ -1821,7 +1821,6 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
{
adev->gfxhub.funcs->gart_disable(adev);
adev->mmhub.funcs->gart_disable(adev);
amdgpu_gart_table_vram_unpin(adev);
}

static int gmc_v9_0_hw_fini(void *handle)
Expand Down

0 comments on commit 575e55e

Please sign in to comment.