Skip to content

Commit

Permalink
drm/amdgpu: Modify the contiguous flags behaviour
Browse files Browse the repository at this point in the history
Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.
- we will try to allocate the contiguous buffer. Say if the
  allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo validation
  and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
  the allocation fails, we return -ENOSPC.

v2:
  - keep the mem_flags and bo->flags check as is(Christian)
  - place the TTM_PL_FLAG_CONTIGUOUS flag setting into the
    amdgpu_bo_pin_restricted function placement range iteration
    loop(Christian)
  - rename find_pages with amdgpu_vram_mgr_calculate_pages_per_block
    (Christian)
  - Keep the kernel BO allocation as is(Christain)
  - If BO pin vram allocation failed, we need to return -ENOSPC as
    RDMA cannot work with scattered VRAM pages(Philip)

v3(Christian):
  - keep contiguous flag handling outside of pages_per_block
    calculation
  - remove the hacky implementation in contiguous flag error
    handling code

v4(Christian):
  - use any variable and return value for non-contiguous
    fallback

v5: rebase to amd-staging-drm-next branch

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Arunpravin Paneer Selvam authored and Alex Deucher committed Apr 26, 2024
1 parent f851b07 commit e362b7c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
8 changes: 7 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;

if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
if (abo->tbo.type == ttm_bo_type_kernel &&
flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;

c++;
}

Expand Down Expand Up @@ -967,6 +969,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (!bo->placements[i].lpfn ||
(lpfn && lpfn < bo->placements[i].lpfn))
bo->placements[i].lpfn = lpfn;

if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
bo->placements[i].mem_type == TTM_PL_VRAM)
bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}

r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
Expand Down
23 changes: 17 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
Expand All @@ -468,7 +469,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;

if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
Expand All @@ -477,7 +478,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* default to 2MB */
pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
pages_per_block = max_t(uint32_t, pages_per_block,
pages_per_block = max_t(u32, pages_per_block,
tbo->page_alignment);
}

Expand All @@ -498,7 +499,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;

if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;

if (fpfn || lpfn != mgr->mm.size)
Expand All @@ -514,21 +515,31 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
min_block_size = mgr->default_page_size;

BUG_ON(min_block_size < mm->chunk_size);

/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);

if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
!(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
!(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;

BUG_ON(min_block_size < mm->chunk_size);

r = drm_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
min_block_size,
&vres->blocks,
vres->flags);

if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
!(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
tbo->page_alignment);

continue;
}

if (unlikely(r))
goto error_free_blocks;

Expand Down

0 comments on commit e362b7c

Please sign in to comment.