Skip to content

Commit

Permalink
Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.or…
Browse files Browse the repository at this point in the history
…g/~gabbayo/linux into drm-next

Major points for this pull request:
- Add dGPU support for amdkfd initialization code and queue handling. It's
  not complete support since the GPUVM part is missing (the under debate stuff).
- Enable PCIe atomics for dGPU if present
- Various adjustments to the amdgpu<-->amdkfd interface for dGPUs
- Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system
- Add HSA process eviction code in case of system memory pressure
- Various fixes and small changes

* tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
  uapi: Fix type used in ioctl parameter structures
  drm/amdkfd: Implement KFD process eviction/restore
  drm/amdkfd: Add GPUVM virtual address space to PDD
  drm/amdkfd: Remove unaligned memory access
  drm/amdkfd: Centralize IOMMUv2 code and make it conditional
  drm/amdgpu: Add submit IB function for KFD
  drm/amdgpu: Add GPUVM memory management functions for KFD
  drm/amdgpu: add amdgpu_sync_clone
  drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support
  drm/amdgpu: Add KFD eviction fence
  drm/amdgpu: Remove unused kfd2kgd interface
  drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid
  drm/amdgpu: Fix header file dependencies
  drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem
  drm/amdgpu: remove useless BUG_ONs
  drm/amdgpu: Enable KFD initialization on dGPUs
  drm/amdkfd: Add dGPU device IDs and device info
  drm/amdkfd: Add dGPU support to kernel_queue_init
  drm/amdkfd: Add dGPU support to the MQD manager
  drm/amdkfd: Add dGPU support to the device queue manager
  ...
  • Loading branch information
Dave Airlie committed Mar 14, 2018
2 parents 0b8eeac + a110244 commit 6fa7324
Show file tree
Hide file tree
Showing 41 changed files with 3,757 additions and 362 deletions.
2 changes: 2 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
Expand Down
1 change: 1 addition & 0 deletions drivers/dma-buf/dma-fence.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)

trace_dma_fence_destroy(fence);

/* Failed to signal before release, could be a refcounting issue */
WARN_ON(!list_empty(&fence->cb_list));

if (fence->ops->release)
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o

# add cgs
Expand Down
132 changes: 101 additions & 31 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);

static const unsigned int compute_vmid_bitmap = 0xFF00;

int amdgpu_amdkfd_init(void)
{
int ret;
Expand All @@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
#else
ret = -ENOENT;
#endif
amdgpu_amdkfd_gpuvm_init_mem_limits();

return ret;
}
Expand All @@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
case CHIP_HAWAII:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
#endif
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
Expand Down Expand Up @@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
.compute_vmid_bitmap = compute_vmid_bitmap,
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_VA_HOLE_START),
.drm_render_minor = adev->ddev->render->index
};

/* this is going to have a few of the MSBs set that we need to
Expand Down Expand Up @@ -204,74 +216,67 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **cpu_ptr)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
struct amdgpu_bo *bo = NULL;
int r;

BUG_ON(kgd == NULL);
BUG_ON(gpu_addr == NULL);
BUG_ON(cpu_ptr == NULL);

*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if ((*mem) == NULL)
return -ENOMEM;
uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;

r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
return r;
}

/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
r = amdgpu_bo_reserve(bo, true);
if (r) {
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}

r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
&gpu_addr_tmp);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;

r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
if (r) {
dev_err(adev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
*cpu_ptr = (*mem)->cpu_ptr;

amdgpu_bo_unreserve((*mem)->bo);
*mem_obj = bo;
*gpu_addr = gpu_addr_tmp;
*cpu_ptr = cpu_ptr_tmp;

amdgpu_bo_unreserve(bo);

return 0;

allocate_mem_kmap_bo_failed:
amdgpu_bo_unpin((*mem)->bo);
amdgpu_bo_unpin(bo);
allocate_mem_pin_bo_failed:
amdgpu_bo_unreserve((*mem)->bo);
amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
amdgpu_bo_unref(&(*mem)->bo);
amdgpu_bo_unref(&bo);

return r;
}

void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;

BUG_ON(mem == NULL);

amdgpu_bo_reserve(mem->bo, true);
amdgpu_bo_kunmap(mem->bo);
amdgpu_bo_unpin(mem->bo);
amdgpu_bo_unreserve(mem->bo);
amdgpu_bo_unref(&(mem->bo));
kfree(mem);
amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&(bo));
}

void get_local_mem_info(struct kgd_dev *kgd,
Expand Down Expand Up @@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)

return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}

int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct amdgpu_ring *ring;
struct dma_fence *f = NULL;
int ret;

switch (engine) {
case KGD_ENGINE_MEC1:
ring = &adev->gfx.compute_ring[0];
break;
case KGD_ENGINE_SDMA1:
ring = &adev->sdma.instance[0].ring;
break;
case KGD_ENGINE_SDMA2:
ring = &adev->sdma.instance[1].ring;
break;
default:
pr_err("Invalid engine in IB submission: %d\n", engine);
ret = -EINVAL;
goto err;
}

ret = amdgpu_job_alloc(adev, 1, &job, NULL);
if (ret)
goto err;

ib = &job->ibs[0];
memset(ib, 0, sizeof(struct amdgpu_ib));

ib->gpu_addr = gpu_addr;
ib->ptr = ib_cmd;
ib->length_dw = ib_len;
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;

ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
if (ret) {
DRM_ERROR("amdgpu: failed to schedule IB.\n");
goto err_ib_sched;
}

ret = dma_fence_wait(f, false);

err_ib_sched:
dma_fence_put(f);
amdgpu_job_free(job);
err:
return ret;
}

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd) {
if ((1 << vmid) & compute_vmid_bitmap)
return true;
}

return false;
}
112 changes: 110 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,89 @@
#include <linux/types.h>
#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"

extern const struct kgd2kfd_calls *kgd2kfd;

struct amdgpu_device;

struct kfd_bo_va_list {
struct list_head bo_list;
struct amdgpu_bo_va *bo_va;
void *kgd_dev;
bool is_mapped;
uint64_t va;
uint64_t pte_flags;
};

struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
uint64_t gpu_addr;
void *cpu_ptr;
struct list_head bo_va_list;
/* protected by amdkfd_process_info.lock */
struct ttm_validate_buffer validate_list;
struct ttm_validate_buffer resv_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;

uint32_t mapping_flags;

struct amdkfd_process_info *process_info;

struct amdgpu_sync sync;

bool aql_queue;
};

/* KFD Memory Eviction */
struct amdgpu_amdkfd_fence {
struct dma_fence base;
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
};

struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);

struct amdkfd_process_info {
/* List head of all VMs that belong to a KFD process */
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;

/* Number of VMs */
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
};

/* struct amdkfd_vm -
* For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
* belonging to a KFD process. All the VMs belonging to the same process point
* to the same amdkfd_process_info.
*/
struct amdkfd_vm {
/* Keep base as the first parameter for pointer compatibility between
* amdkfd_vm and amdgpu_vm.
*/
struct amdgpu_vm base;

/* List node in amdkfd_process_info.vm_list_head*/
struct list_head vm_list_node;

struct amdgpu_device *adev;
/* Points to the KFD process VM info*/
struct amdkfd_process_info *process_info;

uint64_t pd_phys_addr;
};

int amdgpu_amdkfd_init(void);
Expand All @@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);

int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
Expand Down Expand Up @@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
valid; \
})

/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
struct kgd_mem *mem, void **kptr, uint64_t *size);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);

void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);

#endif /* AMDGPU_AMDKFD_H_INCLUDED */
Loading

0 comments on commit 6fa7324

Please sign in to comment.