Skip to content

Commit

Permalink
drm/amdgpu: save vm fault information for amdkfd
Browse files Browse the repository at this point in the history
amdgpu save the vm fault related information for KFD usage and keep the
copy until KFD read it.

Signed-off-by: shaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
  • Loading branch information
shaoyunl authored and Oded Gabbay committed Jul 12, 2018
1 parent 101fee6 commit b97dfa2
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 2 deletions.
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);

int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);

void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
};

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
};

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
Expand Down
14 changes: 14 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
return ret;
}

int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *mem)
{
struct amdgpu_device *adev;

adev = (struct amdgpu_device *)kgd;
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}

/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ struct amdgpu_gmc {
/* protects concurrent invalidation */
spinlock_t invalidate_lock;
bool translate_further;
struct kfd_vm_fault_info *vm_fault_info;
atomic_t vm_fault_info_updated;

const struct amdgpu_gmc_funcs *gmc_funcs;
};
Expand Down
33 changes: 32 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "cik.h"
#include "gmc_v7_0.h"
#include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h"

#include "bif/bif_4_1_d.h"
#include "bif/bif_4_1_sh_mask.h"
Expand Down Expand Up @@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0;
}

adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
atomic_set(&adev->gmc.vm_fault_info_updated, 0);

return 0;
}

Expand All @@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)

amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
gmc_v7_0_gart_fini(adev);
amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw);
Expand Down Expand Up @@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u32 addr, status, mc_client;
u32 addr, status, mc_client, vmid;

addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
Expand All @@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid);
}

vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);

info->vmid = vmid;
info->mc_id = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
info->status = status;
info->page_addr = addr;
info->prot_valid = protections & 0x7 ? true : false;
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
}

return 0;
}

Expand Down
33 changes: 32 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "gmc_v8_0.h"
#include "amdgpu_ucode.h"
#include "amdgpu_amdkfd.h"

#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
Expand Down Expand Up @@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0;
}

adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
atomic_set(&adev->gmc.vm_fault_info_updated, 0);

return 0;
}

Expand All @@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)

amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
kfree(adev->gmc.vm_fault_info);
gmc_v8_0_gart_fini(adev);
amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw);
Expand Down Expand Up @@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
u32 addr, status, mc_client;
u32 addr, status, mc_client, vmid;

if (amdgpu_sriov_vf(adev)) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
Expand Down Expand Up @@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid);
}

vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);

info->vmid = vmid;
info->mc_id = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
info->status = status;
info->page_addr = addr;
info->prot_valid = protections & 0x7 ? true : false;
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
mb();
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
}

return 0;
}

Expand Down
20 changes: 20 additions & 0 deletions drivers/gpu/drm/amd/include/kgd_kfd_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ enum kfd_preempt_type {
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
};

struct kfd_vm_fault_info {
uint64_t page_addr;
uint32_t vmid;
uint32_t mc_id;
uint32_t status;
bool prot_valid;
bool prot_read;
bool prot_write;
bool prot_exec;
};

struct kfd_cu_info {
uint32_t num_shader_engines;
uint32_t num_shader_arrays_per_engine;
Expand Down Expand Up @@ -259,6 +270,12 @@ struct tile_config {
* IB to the corresponding ring (ring type). The IB is executed with the
* specified VMID in a user mode context.
*
* @get_vm_fault_info: Return information about a recent VM fault on
* GFXv7 and v8. If multiple VM faults occurred since the last call of
* this function, it will return information about the first of those
* faults. On GFXv9 VM fault information is fully contained in the IH
* packet and this function is not needed.
*
* This structure contains function pointers to services that the kgd driver
* provides to amdkfd driver.
*
Expand Down Expand Up @@ -374,6 +391,9 @@ struct kfd2kgd_calls {
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);

int (*get_vm_fault_info)(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
};

/**
Expand Down

0 comments on commit b97dfa2

Please sign in to comment.