Skip to content

Commit

Permalink
drm/amdgpu: fix TLB flushing during eviction
Browse files Browse the repository at this point in the history
Testing the valid bit is not enough to figure out if we
need to invalidate the TLB or not.

During eviction it is quite likely that we move a BO from VRAM to GTT and
update the page tables immediately to the new GTT address.

Rework the whole function to get all the necessary parameters directly as
value.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Christian König authored and Alex Deucher committed Apr 5, 2022
1 parent 50e6cb3 commit 30671b4
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 48 deletions.
63 changes: 33 additions & 30 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -793,36 +793,34 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
}

/**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
* amdgpu_vm_update_range - update a range in the vm page table
*
* @adev: amdgpu_device pointer of the VM
* @bo_adev: amdgpu_device pointer of the mapped BO
* @vm: requested vm
* @adev: amdgpu_device pointer to use for commands
* @vm: the VM to update the range
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
* @flush_tlb: trigger tlb invalidation after update completed
* @resv: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @offset: offset into nodes and pages_addr
* @vram_base: base for vram mappings
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
*
* Returns:
* 0 for success, -EINVAL for failure.
* 0 for success, negative erro code for failure.
*/
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev,
struct amdgpu_vm *vm, bool immediate,
bool unlocked, struct dma_resv *resv,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset,
struct ttm_resource *res,
dma_addr_t *pages_addr,
struct dma_fence **fence)
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bool immediate, bool unlocked, bool flush_tlb,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
Expand Down Expand Up @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
}

} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
addr = bo_adev->vm_manager.vram_base_offset +
cursor.start;
addr = vram_base + cursor.start;
} else {
addr = 0;
}
Expand All @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,

r = vm->update_funcs->commit(&params, fence);

if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
if (flush_tlb || params.table_freed) {
tlb_cb->vm = vm;
if (!fence || !*fence ||
dma_fence_add_callback(*fence, &tlb_cb->cb,
Expand Down Expand Up @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
struct dma_fence **last_update;
bool flush_tlb = clear;
struct dma_resv *resv;
uint64_t vram_base;
uint64_t flags;
struct amdgpu_device *bo_adev = adev;
int r;

if (clear || !bo) {
Expand All @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
}

if (bo) {
struct amdgpu_device *bo_adev;

flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);

if (amdgpu_bo_encrypted(bo))
flags |= AMDGPU_PTE_TMZ;

bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
vram_base = bo_adev->vm_manager.vram_base_offset;
} else {
flags = 0x0;
vram_base = 0;
}

if (clear || (bo && bo->tbo.base.resv ==
Expand All @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
last_update = &bo_va->last_pt_update;

if (!clear && bo_va->base.moved) {
bo_va->base.moved = false;
flush_tlb = true;
list_splice_init(&bo_va->valids, &bo_va->invalids);

} else if (bo_va->cleared != clear) {
Expand All @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,

trace_amdgpu_vm_bo_update(mapping);

r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
resv, mapping->start,
mapping->last, update_flags,
mapping->offset, mem,
pages_addr, last_update);
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
resv, mapping->start, mapping->last,
update_flags, mapping->offset,
vram_base, mem, pages_addr,
last_update);
if (r)
return r;
}
Expand All @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,

list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
bo_va->base.moved = false;

if (trace_amdgpu_vm_bo_mapping_enabled()) {
list_for_each_entry(mapping, &bo_va->valids, list)
Expand Down Expand Up @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;

r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
resv, mapping->start,
mapping->last, init_pte_value,
0, NULL, NULL, &f);
r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
mapping->start, mapping->last,
init_pte_value, 0, 0, NULL, NULL,
&f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
Expand Down Expand Up @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
goto error_unlock;
}

r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
addr, flags, value, NULL, NULL, NULL);
r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;

Expand Down
15 changes: 6 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev,
struct amdgpu_vm *vm, bool immediate,
bool unlocked, struct dma_resv *resv,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset,
struct ttm_resource *res,
dma_addr_t *pages_addr,
struct dma_fence **fence);
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bool immediate, bool unlocked, bool flush_tlb,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
Expand Down
18 changes: 9 additions & 9 deletions drivers/gpu/drm/amd/amdkfd/kfd_svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,

pr_debug("[0x%llx 0x%llx]\n", start, last);

return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
start, last, init_pte_value, 0,
NULL, NULL, fence);
return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
last, init_pte_value, 0, 0, NULL, NULL,
fence);
}

static int
Expand Down Expand Up @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);

r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
NULL, last_start,
prange->start + i, pte_flags,
last_start - prange->start,
NULL, dma_addr,
&vm->last_update);
r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
last_start, prange->start + i,
pte_flags,
last_start - prange->start,
bo_adev->vm_manager.vram_base_offset,
NULL, dma_addr, &vm->last_update);

for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
Expand Down

0 comments on commit 30671b4

Please sign in to comment.