Skip to content

Commit

Permalink
drm/amdkfd: Introduce kfd_node struct (v5)
Browse files Browse the repository at this point in the history
Introduce a new structure, kfd_node, which will now represent
a compute node. kfd_node is carved out of kfd_dev structure.
kfd_dev struct now will become the parent of kfd_node, and will
store common resources such as doorbells, GTT sub-alloctor etc.
kfd_node struct will store all resources specific to a compute
node, such as device queue manager, interrupt handling etc.

This is the first step in adding compute partition support in KFD.

v2: introduce kfd_node struct to gc v11 (Hawking)
v3: make reference to kfd_dev struct through kfd_node (Morris)
v4: use kfd_node instead for kfd isr/mqd functions (Morris)
v5: rebase (Alex)

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Mukul Joshi authored and Alex Deucher committed Jun 9, 2023
1 parent 5cf1675 commit 8dc1db3
Show file tree
Hide file tree
Showing 38 changed files with 573 additions and 495 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
#include <drm/ttm/ttm_tt.h>

Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"

static bool cik_event_interrupt_isr(struct kfd_dev *dev,
static bool cik_event_interrupt_isr(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
bool *patched_flag)
Expand Down Expand Up @@ -85,7 +85,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
!amdgpu_no_queue_eviction_on_vm_fault);
}

static void cik_event_interrupt_wq(struct kfd_dev *dev,
static void cik_event_interrupt_wq(struct kfd_node *dev,
const uint32_t *ih_ring_entry)
{
const struct cik_ih_ring_entry *ihre =
Expand Down
43 changes: 22 additions & 21 deletions drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_create_queue_args *args = data;
struct kfd_dev *dev;
struct kfd_node *dev;
int err = 0;
unsigned int queue_id;
struct kfd_process_device *pdd;
Expand Down Expand Up @@ -328,15 +328,15 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
}

if (!pdd->doorbell_index &&
kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) {
err = -ENOMEM;
goto err_alloc_doorbells;
}

/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
*/
if (dev->shared_resources.enable_mes &&
if (dev->kfd->shared_resources.enable_mes &&
((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
>> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
struct amdgpu_bo_va_mapping *wptr_mapping;
Expand Down Expand Up @@ -887,7 +887,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
{
struct kfd_ioctl_set_scratch_backing_va_args *args = data;
struct kfd_process_device *pdd;
struct kfd_dev *dev;
struct kfd_node *dev;
long err;

mutex_lock(&p->mutex);
Expand Down Expand Up @@ -1006,18 +1006,18 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
return ret;
}

bool kfd_dev_is_large_bar(struct kfd_dev *dev)
bool kfd_dev_is_large_bar(struct kfd_node *dev)
{
if (debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar machine\n");
return true;
}

if (dev->use_iommu_v2)
if (dev->kfd->use_iommu_v2)
return false;

if (dev->local_mem_info.local_mem_size_private == 0 &&
dev->local_mem_info.local_mem_size_public > 0)
if (dev->kfd->local_mem_info.local_mem_size_private == 0 &&
dev->kfd->local_mem_info.local_mem_size_public > 0)
return true;
return false;
}
Expand All @@ -1041,7 +1041,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
struct kfd_process_device *pdd;
void *mem;
struct kfd_dev *dev;
struct kfd_node *dev;
int idr_handle;
long err;
uint64_t offset = args->mmap_offset;
Expand Down Expand Up @@ -1105,7 +1105,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}

if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev)) {
if (args->size != kfd_doorbell_process_slice(dev->kfd)) {
err = -EINVAL;
goto err_unlock;
}
Expand Down Expand Up @@ -1231,7 +1231,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
struct kfd_ioctl_map_memory_to_gpu_args *args = data;
struct kfd_process_device *pdd, *peer_pdd;
void *mem;
struct kfd_dev *dev;
struct kfd_node *dev;
long err = 0;
int i;
uint32_t *devices_arr = NULL;
Expand Down Expand Up @@ -1405,7 +1405,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
args->n_success = i+1;
}

flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
if (flush_tlb) {
err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true);
Expand Down Expand Up @@ -1445,7 +1445,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
int retval;
struct kfd_ioctl_alloc_queue_gws_args *args = data;
struct queue *q;
struct kfd_dev *dev;
struct kfd_node *dev;

mutex_lock(&p->mutex);
q = pqm_get_user_queue(&p->pqm, args->queue_id);
Expand Down Expand Up @@ -1482,7 +1482,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_get_dmabuf_info_args *args = data;
struct kfd_dev *dev = NULL;
struct kfd_node *dev = NULL;
struct amdgpu_device *dmabuf_adev;
void *metadata_buffer = NULL;
uint32_t flags;
Expand Down Expand Up @@ -1596,7 +1596,7 @@ static int kfd_ioctl_export_dmabuf(struct file *filep,
struct kfd_ioctl_export_dmabuf_args *args = data;
struct kfd_process_device *pdd;
struct dma_buf *dmabuf;
struct kfd_dev *dev;
struct kfd_node *dev;
void *mem;
int ret = 0;

Expand Down Expand Up @@ -2178,7 +2178,7 @@ static int criu_restore_devices(struct kfd_process *p,
}

for (i = 0; i < args->num_devices; i++) {
struct kfd_dev *dev;
struct kfd_node *dev;
struct kfd_process_device *pdd;
struct file *drm_file;

Expand Down Expand Up @@ -2240,7 +2240,7 @@ static int criu_restore_devices(struct kfd_process *p,
}

if (!pdd->doorbell_index &&
kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
ret = -ENOMEM;
goto exit;
}
Expand Down Expand Up @@ -2268,7 +2268,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
u64 offset;

if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev))
if (bo_bucket->size !=
kfd_doorbell_process_slice(pdd->dev->kfd))
return -EINVAL;

offset = kfd_get_process_doorbells(pdd);
Expand Down Expand Up @@ -2350,7 +2351,7 @@ static int criu_restore_bo(struct kfd_process *p,

/* now map these BOs to GPU/s */
for (j = 0; j < p->n_pdds; j++) {
struct kfd_dev *peer;
struct kfd_node *peer;
struct kfd_process_device *peer_pdd;

if (!bo_priv->mapped_gpuids[j])
Expand Down Expand Up @@ -2947,7 +2948,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
return retcode;
}

static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
Expand Down Expand Up @@ -2981,7 +2982,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
struct kfd_dev *dev = NULL;
struct kfd_node *dev = NULL;
unsigned long mmap_offset;
unsigned int gpu_id;

Expand Down
28 changes: 14 additions & 14 deletions drivers/gpu/drm/amd/amdkfd/kfd_crat.c
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
return i;
}

int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;

Expand Down Expand Up @@ -1524,7 +1524,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
Expand Down Expand Up @@ -1858,7 +1858,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
}

static int kfd_fill_gpu_memory_affinity(int *avail_size,
struct kfd_dev *kdev, uint8_t type, uint64_t size,
struct kfd_node *kdev, uint8_t type, uint64_t size,
struct crat_subtype_memory *sub_type_hdr,
uint32_t proximity_domain,
const struct kfd_local_mem_info *local_mem_info)
Expand Down Expand Up @@ -1887,7 +1887,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
}

#ifdef CONFIG_ACPI_NUMA
static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
{
struct acpi_table_header *table_header = NULL;
struct acpi_subtable_header *sub_header = NULL;
Expand Down Expand Up @@ -1982,7 +1982,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
* Return 0 if successful else return -ve value
*/
static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
struct kfd_dev *kdev,
struct kfd_node *kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain)
{
Expand Down Expand Up @@ -2044,8 +2044,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
}

static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
struct kfd_dev *kdev,
struct kfd_dev *peer_kdev,
struct kfd_node *kdev,
struct kfd_node *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
Expand Down Expand Up @@ -2081,7 +2081,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
* [OUT] actual size of data filled in crat_image
*/
static int kfd_create_vcrat_image_gpu(void *pcrat_image,
size_t *size, struct kfd_dev *kdev,
size_t *size, struct kfd_node *kdev,
uint32_t proximity_domain)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
Expand Down Expand Up @@ -2153,7 +2153,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
/* Check if this node supports IOMMU. During parsing this flag will
* translate to HSA_CAP_ATS_PRESENT
*/
if (!kfd_iommu_check_device(kdev))
if (!kfd_iommu_check_device(kdev->kfd))
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;

crat_table->length += sub_type_hdr->length;
Expand All @@ -2164,7 +2164,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* report the total FB size (public+private) as a single
* private heap.
*/
local_mem_info = kdev->local_mem_info;
local_mem_info = kdev->kfd->local_mem_info;
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);

Expand Down Expand Up @@ -2216,12 +2216,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (from other GPU to this GPU) will be added
* in kfd_parse_subtype_iolink.
*/
if (kdev->hive_id) {
if (kdev->kfd->hive_id) {
for (nid = 0; nid < proximity_domain; ++nid) {
peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
if (!peer_dev->gpu)
continue;
if (peer_dev->gpu->hive_id != kdev->hive_id)
if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
Expand Down Expand Up @@ -2255,12 +2255,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
* -- this option is not currently implemented.
* The assumption is that all AMD APUs will have CRAT
* @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
* @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
*
* Return 0 if successful else return -ve value
*/
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
int flags, struct kfd_dev *kdev,
int flags, struct kfd_node *kdev,
uint32_t proximity_domain)
{
void *pcrat_image = NULL;
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdkfd/kfd_crat.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ struct crat_subtype_generic {

#pragma pack()

struct kfd_dev;
struct kfd_node;

/* Static table to describe GPU Cache information */
struct kfd_gpu_cache_info {
Expand All @@ -305,14 +305,14 @@ struct kfd_gpu_cache_info {
*/
uint32_t num_cu_shared;
};
int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info);
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info);

int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
void kfd_destroy_crat_image(void *crat_image);
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
uint32_t proximity_domain);
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
int flags, struct kfd_dev *kdev,
int flags, struct kfd_node *kdev,
uint32_t proximity_domain);

#endif /* KFD_CRAT_H_INCLUDED */
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data)
static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
const char __user *user_buf, size_t size, loff_t *ppos)
{
struct kfd_dev *dev;
struct kfd_node *dev;
char tmp[16];
uint32_t gpu_id;
int ret = -EINVAL;
Expand Down
Loading

0 comments on commit 8dc1db3

Please sign in to comment.