Skip to content

Commit

Permalink
drm/ttm: flip the switch for driver allocated resources v2
Browse files Browse the repository at this point in the history
Instead of both driver and TTM allocating memory finalize embedding the
ttm_resource object as base into the driver backends.

v2: fix typo in vmwgfx grid mgr and double init in amdgpu_vram_mgr.c

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210602100914.46246-10-christian.koenig@amd.com
  • Loading branch information
Christian König committed Jun 4, 2021
1 parent d3bcb4b commit cb1c814
Show file tree
Hide file tree
Showing 16 changed files with 140 additions and 189 deletions.
44 changes: 19 additions & 25 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ to_gtt_mgr(struct ttm_resource_manager *man)
static inline struct amdgpu_gtt_node *
to_amdgpu_gtt_node(struct ttm_resource *res)
{
return container_of(res->mm_node, struct amdgpu_gtt_node,
base.mm_nodes[0]);
return container_of(res, struct amdgpu_gtt_node, base.base);
}

/**
Expand Down Expand Up @@ -102,13 +101,13 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = {
/**
* amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
*
* @mem: the mem object to check
* @res: the mem object to check
*
* Check if a mem object has already address space allocated.
*/
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem)
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
{
struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(mem);
struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);

return drm_mm_node_allocated(&node->base.mm_nodes[0]);
}
Expand All @@ -126,19 +125,20 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem)
static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
struct ttm_resource *mem)
struct ttm_resource **res)
{
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
uint32_t num_pages = PFN_UP(tbo->base.size);
struct amdgpu_gtt_node *node;
int r;

spin_lock(&mgr->lock);
if ((tbo->resource == mem || tbo->resource->mem_type != TTM_PL_TT) &&
atomic64_read(&mgr->available) < mem->num_pages) {
if (tbo->resource && tbo->resource->mem_type != TTM_PL_TT &&
atomic64_read(&mgr->available) < num_pages) {
spin_unlock(&mgr->lock);
return -ENOSPC;
}
atomic64_sub(mem->num_pages, &mgr->available);
atomic64_sub(num_pages, &mgr->available);
spin_unlock(&mgr->lock);

node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
Expand All @@ -154,29 +154,28 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
spin_lock(&mgr->lock);
r = drm_mm_insert_node_in_range(&mgr->mm,
&node->base.mm_nodes[0],
mem->num_pages,
tbo->page_alignment, 0,
place->fpfn, place->lpfn,
num_pages, tbo->page_alignment,
0, place->fpfn, place->lpfn,
DRM_MM_INSERT_BEST);
spin_unlock(&mgr->lock);
if (unlikely(r))
goto err_free;

mem->start = node->base.mm_nodes[0].start;
node->base.base.start = node->base.mm_nodes[0].start;
} else {
node->base.mm_nodes[0].start = 0;
node->base.mm_nodes[0].size = mem->num_pages;
mem->start = AMDGPU_BO_INVALID_OFFSET;
node->base.mm_nodes[0].size = node->base.base.num_pages;
node->base.base.start = AMDGPU_BO_INVALID_OFFSET;
}

mem->mm_node = &node->base.mm_nodes[0];
*res = &node->base.base;
return 0;

err_free:
kfree(node);

err_out:
atomic64_add(mem->num_pages, &mgr->available);
atomic64_add(num_pages, &mgr->available);

return r;
}
Expand All @@ -190,21 +189,16 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
* Free the allocated GTT again.
*/
static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *mem)
struct ttm_resource *res)
{
struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
struct amdgpu_gtt_node *node;

if (!mem->mm_node)
return;

node = to_amdgpu_gtt_node(mem);

spin_lock(&mgr->lock);
if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
drm_mm_remove_node(&node->base.mm_nodes[0]);
spin_unlock(&mgr->lock);
atomic64_add(mem->num_pages, &mgr->available);
atomic64_add(res->num_pages, &mgr->available);

kfree(node);
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (bo->base.resv == &bo->base._resv)
amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);

if (bo->resource->mem_type != TTM_PL_VRAM || !bo->resource->mm_node ||
if (bo->resource->mem_type != TTM_PL_VRAM ||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
return;

Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <drm/drm_mm.h>
#include <drm/ttm/ttm_resource.h>
#include <drm/ttm/ttm_range_manager.h>

/* state back for walking over vram_mgr and gtt_mgr allocations */
struct amdgpu_res_cursor {
Expand All @@ -53,7 +54,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
{
struct drm_mm_node *node;

if (!res || !res->mm_node) {
if (!res) {
cur->start = start;
cur->size = size;
cur->remaining = size;
Expand All @@ -63,7 +64,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,

BUG_ON(start + size > res->num_pages << PAGE_SHIFT);

node = res->mm_node;
node = to_ttm_range_mgr_node(res)->mm_nodes;
while (start >= node->size << PAGE_SHIFT)
start -= node++->size << PAGE_SHIFT;

Expand Down
60 changes: 27 additions & 33 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,19 +219,20 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *mem = bo->tbo.resource;
struct drm_mm_node *nodes = mem->mm_node;
unsigned pages = mem->num_pages;
struct ttm_resource *res = bo->tbo.resource;
unsigned pages = res->num_pages;
struct drm_mm_node *mm;
u64 usage;

if (amdgpu_gmc_vram_full_visible(&adev->gmc))
return amdgpu_bo_size(bo);

if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
return 0;

for (usage = 0; nodes && pages; pages -= nodes->size, nodes++)
usage += amdgpu_vram_mgr_vis_size(adev, nodes);
mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
for (usage = 0; pages; pages -= mm->size, mm++)
usage += amdgpu_vram_mgr_vis_size(adev, mm);

return usage;
}
Expand Down Expand Up @@ -367,7 +368,7 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
struct ttm_buffer_object *tbo,
const struct ttm_place *place,
struct ttm_resource *mem)
struct ttm_resource **res)
{
unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
Expand All @@ -388,7 +389,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
max_bytes -= AMDGPU_VM_RESERVED_VRAM;

/* bail out quickly if there's likely not enough VRAM for this BO */
mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
mem_bytes = tbo->base.size;
if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
r = -ENOSPC;
goto error_sub;
Expand All @@ -406,7 +407,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
#endif
pages_per_node = max_t(uint32_t, pages_per_node,
tbo->page_alignment);
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
num_nodes = DIV_ROUND_UP(PFN_UP(mem_bytes), pages_per_node);
}

node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
Expand All @@ -422,8 +423,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
mode = DRM_MM_INSERT_HIGH;

mem->start = 0;
pages_left = mem->num_pages;
pages_left = node->base.num_pages;

/* Limit maximum size to 2GB due to SG table limitations */
pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
Expand Down Expand Up @@ -451,7 +451,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
}

vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &node->mm_nodes[i]);
amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
pages_left -= pages;
++i;

Expand All @@ -461,10 +461,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
spin_unlock(&mgr->lock);

if (i == 1)
mem->placement |= TTM_PL_FLAG_CONTIGUOUS;
node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;

atomic64_add(vis_usage, &mgr->vis_usage);
mem->mm_node = &node->mm_nodes[0];
*res = &node->base;
return 0;

error_free:
Expand All @@ -487,28 +487,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
* Free the allocated VRAM again.
*/
static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *mem)
struct ttm_resource *res)
{
struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
struct ttm_range_mgr_node *node;
uint64_t usage = 0, vis_usage = 0;
unsigned pages = mem->num_pages;
struct drm_mm_node *nodes;

if (!mem->mm_node)
return;

node = to_ttm_range_mgr_node(mem);
nodes = &node->mm_nodes[0];
unsigned i, pages;

spin_lock(&mgr->lock);
while (pages) {
pages -= nodes->size;
drm_mm_remove_node(nodes);
usage += nodes->size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
++nodes;
for (i = 0, pages = res->num_pages; pages;
pages -= node->mm_nodes[i].size, ++i) {
struct drm_mm_node *mm = &node->mm_nodes[i];

drm_mm_remove_node(mm);
usage += mm->size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
}
amdgpu_vram_mgr_do_reserve(man);
spin_unlock(&mgr->lock);
Expand All @@ -533,7 +527,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
* Allocate and fill a sg table from a VRAM allocation.
*/
int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
struct ttm_resource *mem,
struct ttm_resource *res,
u64 offset, u64 length,
struct device *dev,
enum dma_data_direction dir,
Expand All @@ -549,7 +543,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
return -ENOMEM;

/* Determine the number of DRM_MM nodes to export */
amdgpu_res_first(mem, offset, length, &cursor);
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
amdgpu_res_next(&cursor, cursor.size);
Expand All @@ -569,7 +563,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
* and the number of bytes from it. Access the following
* DRM_MM node(s) if more buffer needs to exported
*/
amdgpu_res_first(mem, offset, length, &cursor);
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + adev->gmc.aper_base;
size_t size = cursor.size;
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/drm_gem_vram_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ EXPORT_SYMBOL(drm_gem_vram_put);
static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
{
/* Keep TTM behavior for now, remove when drivers are audited */
if (WARN_ON_ONCE(!gbo->bo.resource->mm_node))
if (WARN_ON_ONCE(!gbo->bo.resource ||
gbo->bo.resource->mem_type == TTM_PL_SYSTEM))
return 0;

return gbo->bo.resource->start;
Expand Down
8 changes: 2 additions & 6 deletions drivers/gpu/drm/nouveau/nouveau_bo.c
Original file line number Diff line number Diff line change
Expand Up @@ -918,12 +918,8 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
}
}

if (new_reg) {
if (new_reg->mm_node)
nvbo->offset = (new_reg->start << PAGE_SHIFT);
else
nvbo->offset = 0;
}
if (new_reg)
nvbo->offset = (new_reg->start << PAGE_SHIFT);

}

Expand Down
11 changes: 5 additions & 6 deletions drivers/gpu/drm/nouveau/nouveau_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,25 +178,24 @@ void
nouveau_mem_del(struct ttm_resource *reg)
{
struct nouveau_mem *mem = nouveau_mem(reg);
if (!mem)
return;

nouveau_mem_fini(mem);
kfree(reg->mm_node);
reg->mm_node = NULL;
kfree(mem);
}

int
nouveau_mem_new(struct nouveau_cli *cli, u8 kind, u8 comp,
struct ttm_resource *reg)
struct ttm_resource **res)
{
struct nouveau_mem *mem;

if (!(mem = kzalloc(sizeof(*mem), GFP_KERNEL)))
return -ENOMEM;

mem->cli = cli;
mem->kind = kind;
mem->comp = comp;

reg->mm_node = mem;
*res = &mem->base;
return 0;
}
14 changes: 7 additions & 7 deletions drivers/gpu/drm/nouveau/nouveau_mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@ struct ttm_tt;
#include <nvif/mem.h>
#include <nvif/vmm.h>

static inline struct nouveau_mem *
nouveau_mem(struct ttm_resource *reg)
{
return reg->mm_node;
}

struct nouveau_mem {
struct ttm_resource base;
struct nouveau_cli *cli;
Expand All @@ -21,8 +15,14 @@ struct nouveau_mem {
struct nvif_vma vma[2];
};

static inline struct nouveau_mem *
nouveau_mem(struct ttm_resource *reg)
{
return container_of(reg, struct nouveau_mem, base);
}

int nouveau_mem_new(struct nouveau_cli *, u8 kind, u8 comp,
struct ttm_resource *);
struct ttm_resource **);
void nouveau_mem_del(struct ttm_resource *);
int nouveau_mem_vram(struct ttm_resource *, bool contig, u8 page);
int nouveau_mem_host(struct ttm_resource *, struct ttm_tt *);
Expand Down
Loading

0 comments on commit cb1c814

Please sign in to comment.